20 files changed, 7913 insertions, 0 deletions
diff --git a/ml/dlib/dlib/statistics/average_precision.h b/ml/dlib/dlib/statistics/average_precision.h
new file mode 100644
index 000000000..6c2e7e0b1
--- /dev/null
+++ b/ml/dlib/dlib/statistics/average_precision.h
@@ -0,0 +1,66 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_AVERAGE_PREcISION_Hh_
+#define DLIB_AVERAGE_PREcISION_Hh_
+
+#include "average_precision_abstract.h"
+#include <vector>
+
+
+namespace dlib
+{
+    namespace impl
+    {
+        inline bool get_bool_part (
+            const bool& b
+        ) { return b; }
+
+        template <typename T>
+        bool get_bool_part(const std::pair<T,bool>& item) { return item.second; }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename alloc>
+    double average_precision (
+        const std::vector<T,alloc>& items,
+        unsigned long missing_relevant_items = 0
+    )
+    {
+        using namespace dlib::impl;
+        double relevant_count = 0;
+        // find the precision values
+        std::vector<double> precision;
+        for (unsigned long i = 0; i < items.size(); ++i)
+        {
+            if (get_bool_part(items[i]))
+            {
+                ++relevant_count;
+                precision.push_back(relevant_count / (i+1));
+            }
+        }
+
+        double precision_sum = 0;
+        double max_val = 0;
+        // now sum over the interpolated precision values
+        for (std::vector<double>::reverse_iterator i = precision.rbegin(); i != precision.rend(); ++i)
+        {
+            max_val = std::max(max_val, *i);
+            precision_sum += max_val;
+        }
+
+
+        relevant_count += missing_relevant_items;
+
+        if (relevant_count != 0)
+            return precision_sum/relevant_count;
+        else
+            return 1; 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_AVERAGE_PREcISION_Hh_
+
diff --git a/ml/dlib/dlib/statistics/average_precision_abstract.h b/ml/dlib/dlib/statistics/average_precision_abstract.h
new file mode 100644
index 000000000..76c2c702a
--- /dev/null
+++ b/ml/dlib/dlib/statistics/average_precision_abstract.h
@@ -0,0 +1,67 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_
+#ifdef DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_
+
+#include <vector>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename alloc
+        >
+    double average_precision (
+        const std::vector<bool,alloc>& items,
+        unsigned long missing_relevant_items = 0
+    );
+    /*!
+        ensures
+            - Interprets items as a list of relevant and non-relevant items in a response
+              from an information retrieval system.  In particular, items with a true value
+              are relevant and false items are non-relevant.  This function then returns
+              the average precision of the ranking of the given items.  For, example, the
+              ranking [true, true, true, true, false] would have an average precision of 1.
+              On the other hand, the ranking of [true false false true] would have an
+              average precision of 0.75 (because the first true has a precision of 1 and
+              the second true has a precision of 0.5, giving an average of 0.75).
+            - As a special case, if item contains no true elements then the average
+              precision is considered to be 1.
+            - Note that we use the interpolated precision.  That is, the interpolated
+              precision at a recall value r is set to the maximum precision obtained at any
+              higher recall value.  Or in other words, we interpolate the precision/recall
+              curve so that precision is monotonically decreasing.  Therefore, the average
+              precision value returned by this function is the area under this interpolated
+              precision/recall curve.
+            - This function will add in missing_relevant_items number of items with a
+              precision of zero into the average value returned.  For example, the average
+              precision of the ranking [true, true] if there are 2 missing relevant items
+              is 0.5.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename alloc
+        >
+    double average_precision (
+        const std::vector<std::pair<T,bool>,alloc>& items,
+        unsigned long missing_relevant_items = 0
+    );
+    /*!
+        ensures
+            - this function is equivalent to copying the bool values from items into a
+              std::vector<bool> and then calling the above average_precision() routine on
+              it and returning the result. 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/statistics/cca.h b/ml/dlib/dlib/statistics/cca.h
new file mode 100644
index 000000000..21300ea8f
--- /dev/null
+++ b/ml/dlib/dlib/statistics/cca.h
@@ -0,0 +1,186 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_CCA_hh_
+#define DLIB_CCA_hh_
+
+#include "cca_abstract.h"
+#include "../algs.h"
+#include "../matrix.h"
+#include "../sparse_vector.h"
+#include "random_subset_selector.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    matrix<typename T::type,0,1> compute_correlations (
+        const matrix_exp<T>& L,
+        const matrix_exp<T>& R
+    )
+    {
+        DLIB_ASSERT( L.size() > 0 && R.size() > 0 && L.nr() == R.nr(), 
+            "\t matrix compute_correlations()"
+            << "\n\t Invalid inputs were given to this function."
+            << "\n\t L.size(): " << L.size()
+            << "\n\t R.size(): " << R.size()
+            << "\n\t L.nr():   " << L.nr()
+            << "\n\t R.nr():   " << R.nr()
+            );
+
+        typedef typename T::type type;
+        matrix<type> A, B, C;
+        A = diag(trans(R)*L);
+        B = sqrt(diag(trans(L)*L));
+        C = sqrt(diag(trans(R)*R));
+        A = pointwise_multiply(A , reciprocal(pointwise_multiply(B,C)));
+        return A;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type, 
+        typename T
+        >
+    matrix<T,0,1> impl_cca (
+        const matrix_type& L,
+        const matrix_type& R,
+        matrix<T>& Ltrans,
+        matrix<T>& Rtrans,
+        unsigned long num_correlations,
+        unsigned long extra_rank,
+        unsigned long q,
+        unsigned long num_output_correlations,
+        double regularization
+    )
+    {
+        matrix<T> Ul, Vl;
+        matrix<T> Ur, Vr;
+        matrix<T> U, V;
+        matrix<T,0,1> Dr, Dl, D;
+
+
+        // Note that we add a few more singular vectors in because it helps improve the
+        // final results if we run this part with a little higher rank than the final SVD.
+        svd_fast(L, Ul, Dl, Vl, num_correlations+extra_rank, q);
+        svd_fast(R, Ur, Dr, Vr, num_correlations+extra_rank, q);
+
+        // Zero out singular values that are essentially zero so they don't cause numerical
+        // difficulties in the code below.
+        const double eps = std::numeric_limits<T>::epsilon()*std::max(max(Dr),max(Dl))*100;
+        Dl = round_zeros(Dl+regularization,eps);
+        Dr = round_zeros(Dr+regularization,eps);
+
+        // This matrix is really small so we can do a normal full SVD on it.  Note that we
+        // also throw away the columns of Ul and Ur corresponding to zero singular values.
+        svd3(diagm(Dl>0)*tmp(trans(Ul)*Ur)*diagm(Dr>0), U, D, V);
+
+        // now throw away extra columns of the transformations.  We do this in a way
+        // that keeps the directions that have the highest correlations.
+        matrix<T,0,1> temp = D;
+        rsort_columns(U, temp);
+        rsort_columns(V, D);
+        U = colm(U, range(0, num_output_correlations-1));
+        V = colm(V, range(0, num_output_correlations-1));
+        D = rowm(D, range(0, num_output_correlations-1));
+
+        Ltrans = Vl*inv(diagm(Dl))*U;
+        Rtrans = Vr*inv(diagm(Dr))*V;
+
+        // Note that the D matrix contains the correlation values for the transformed
+        // vectors.  However, when the L and R matrices have rank higher than
+        // num_correlations+extra_rank then the values in D become only approximate.
+        return D; 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    matrix<T,0,1> cca (
+        const matrix<T>& L,
+        const matrix<T>& R,
+        matrix<T>& Ltrans,
+        matrix<T>& Rtrans,
+        unsigned long num_correlations,
+        unsigned long extra_rank = 5,
+        unsigned long q = 2,
+        double regularization = 0
+    )
+    {
+        DLIB_ASSERT( num_correlations > 0 && L.size() > 0 && R.size() > 0 && L.nr() == R.nr() &&
+            regularization >= 0, 
+            "\t matrix cca()"
+            << "\n\t Invalid inputs were given to this function."
+            << "\n\t num_correlations: " << num_correlations 
+            << "\n\t regularization:   " << regularization 
+            << "\n\t L.size(): " << L.size()
+            << "\n\t R.size(): " << R.size()
+            << "\n\t L.nr():   " << L.nr()
+            << "\n\t R.nr():   " << R.nr()
+            );
+
+        using std::min;
+        const unsigned long n = min(num_correlations, (unsigned long)min(R.nr(),min(L.nc(), R.nc())));
+        return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, n, regularization); 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename sparse_vector_type, typename T>
+    matrix<T,0,1> cca (
+        const std::vector<sparse_vector_type>& L,
+        const std::vector<sparse_vector_type>& R,
+        matrix<T>& Ltrans,
+        matrix<T>& Rtrans,
+        unsigned long num_correlations,
+        unsigned long extra_rank = 5,
+        unsigned long q = 2,
+        double regularization = 0
+    )
+    {
+        DLIB_ASSERT( num_correlations > 0 && L.size() == R.size() && 
+                     max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0 &&
+                     regularization >= 0, 
+            "\t matrix cca()"
+            << "\n\t Invalid inputs were given to this function."
+            << "\n\t num_correlations: " << num_correlations 
+            << "\n\t regularization:   " << regularization 
+            << "\n\t L.size(): " << L.size()
+            << "\n\t R.size(): " << R.size()
+            << "\n\t max_index_plus_one(L):   " << max_index_plus_one(L)
+            << "\n\t max_index_plus_one(R):   " << max_index_plus_one(R)
+            );
+
+        using std::min;
+        const unsigned long n = min(max_index_plus_one(L), max_index_plus_one(R));
+        const unsigned long num_output_correlations = min(num_correlations, std::min<unsigned long>(R.size(),n));
+        return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, num_output_correlations, regularization); 
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename sparse_vector_type, typename Rand_type, typename T>
+    matrix<T,0,1> cca (
+        const random_subset_selector<sparse_vector_type,Rand_type>& L,
+        const random_subset_selector<sparse_vector_type,Rand_type>& R,
+        matrix<T>& Ltrans,
+        matrix<T>& Rtrans,
+        unsigned long num_correlations,
+        unsigned long extra_rank = 5,
+        unsigned long q = 2
+    )
+    {
+        return cca(L.to_std_vector(), R.to_std_vector(), Ltrans, Rtrans, num_correlations, extra_rank, q);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CCA_hh_
+
+
diff --git a/ml/dlib/dlib/statistics/cca_abstract.h b/ml/dlib/dlib/statistics/cca_abstract.h
new file mode 100644
index 000000000..8e0b4e742
--- /dev/null
+++ b/ml/dlib/dlib/statistics/cca_abstract.h
@@ -0,0 +1,191 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_CCA_AbSTRACT_Hh_
+#ifdef DLIB_CCA_AbSTRACT_Hh_
+
+#include "../matrix/matrix_la_abstract.h"
+#include "random_subset_selector_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    matrix<typename T::type,0,1> compute_correlations (
+        const matrix_exp<T>& L,
+        const matrix_exp<T>& R
+    );
+    /*!
+        requires
+            - L.size() > 0 
+            - R.size() > 0 
+            - L.nr() == R.nr()
+        ensures
+            - This function treats L and R as sequences of paired row vectors.  It
+              then computes the correlation values between the elements of these 
+              row vectors.  In particular, we return a vector COR such that:
+                - COR.size() == L.nc()
+                - for all valid i:
+                    - COR(i) == the correlation coefficient between the following sequence
+                      of paired numbers: (L(k,i), R(k,i)) for k: 0 <= k < L.nr().
+                      Therefore, COR(i) is a value between -1 and 1 inclusive where 1
+                      indicates perfect correlation and -1 perfect anti-correlation.  Note
+                      that this function assumes the input data vectors have been centered
+                      (i.e. made to have zero mean).  If this is not the case then it will
+                      report inaccurate results.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    matrix<T,0,1> cca (
+        const matrix<T>& L,
+        const matrix<T>& R,
+        matrix<T>& Ltrans,
+        matrix<T>& Rtrans,
+        unsigned long num_correlations,
+        unsigned long extra_rank = 5,
+        unsigned long q = 2,
+        double regularization = 0
+    );
+    /*!
+        requires
+            - num_correlations > 0
+            - L.size() > 0 
+            - R.size() > 0 
+            - L.nr() == R.nr()
+            - regularization >= 0
+        ensures
+            - This function performs a canonical correlation analysis between the row
+              vectors in L and R.  That is, it finds two transformation matrices, Ltrans
+              and Rtrans, such that row vectors in the transformed matrices L*Ltrans and
+              R*Rtrans are as correlated as possible.  That is, we try to find two transforms
+              such that the correlation values returned by compute_correlations(L*Ltrans, R*Rtrans)
+              would be maximized.
+            - Let N == min(num_correlations, min(R.nr(),min(L.nc(),R.nc())))
+              (This is the actual number of elements in the transformed vectors.
+              Therefore, note that you can't get more outputs than there are rows or
+              columns in the input matrices.)
+            - #Ltrans.nr() == L.nc()
+            - #Ltrans.nc() == N 
+            - #Rtrans.nr() == R.nc()
+            - #Rtrans.nc() == N 
+            - This function assumes the data vectors in L and R have already been centered
+              (i.e. we assume the vectors have zero means).  However, in many cases it is
+              fine to use uncentered data with cca().  But if it is important for your
+              problem then you should center your data before passing it to cca().
+            - This function works with reduced rank approximations of the L and R matrices.
+              This makes it fast when working with large matrices.  In particular, we use
+              the svd_fast() routine to find reduced rank representations of the input
+              matrices by calling it as follows: svd_fast(L, U,D,V, num_correlations+extra_rank, q) 
+              and similarly for R.  This means that you can use the extra_rank and q
+              arguments to cca() to influence the accuracy of the reduced rank
+              approximation.  However, the default values should work fine for most
+              problems.
+            - returns an estimate of compute_correlations(L*#Ltrans, R*#Rtrans).  The
+              returned vector should exactly match the output of compute_correlations()
+              when the reduced rank approximation to L and R is accurate and regularization
+              is set to 0.  However, if this is not the case then the return value of this
+              function will deviate from compute_correlations(L*#Ltrans, R*#Rtrans).  This
+              deviation can be used to check if the reduced rank approximation is working
+              or you need to increase extra_rank.
+            - The dimensions of the output vectors produced by L*#Ltrans or R*#Rtrans are
+              ordered such that the dimensions with the highest correlations come first.
+              That is, after applying the transforms produced by cca() to a set of vectors
+              you will find that dimension 0 has the highest correlation, then dimension 1
+              has the next highest, and so on.  This also means that the list of numbers
+              returned from cca() will always be listed in decreasing order.
+            - This function performs the ridge regression version of Canonical Correlation
+              Analysis when regularization is set to a value > 0.  In particular, larger
+              values indicate the solution should be more heavily regularized.  This can be
+              useful when the dimensionality of the data is larger than the number of
+              samples.
+            - A good discussion of CCA can be found in the paper "Canonical Correlation
+              Analysis" by David Weenink.  In particular, this function is implemented
+              using equations 29 and 30 from his paper.  We also use the idea of doing CCA
+              on a reduced rank approximation of L and R as suggested by Paramveer S.
+              Dhillon in his paper "Two Step CCA: A new spectral method for estimating
+              vector models of words".
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sparse_vector_type, 
+        typename T
+        >
+    matrix<T,0,1> cca (
+        const std::vector<sparse_vector_type>& L,
+        const std::vector<sparse_vector_type>& R,
+        matrix<T>& Ltrans,
+        matrix<T>& Rtrans,
+        unsigned long num_correlations,
+        unsigned long extra_rank = 5,
+        unsigned long q = 2,
+        double regularization = 0
+    );
+    /*!
+        requires
+            - num_correlations > 0
+            - L.size() == R.size()
+            - max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0
+              (i.e. L and R can't represent empty matrices)
+            - L and R must contain sparse vectors (see the top of dlib/svm/sparse_vector_abstract.h
+              for a definition of sparse vector)
+            - regularization >= 0
+        ensures
+            - This is just an overload of the cca() function defined above.  Except in this
+              case we take a sparse representation of the input L and R matrices rather than
+              dense matrices.  Therefore, in this case, we interpret L and R as matrices
+              with L.size() rows, where each row is defined by a sparse vector.  So this 
+              function does exactly the same thing as the above cca().
+            - Note that you can apply the output transforms to a sparse vector with the
+              following code:
+                sparse_matrix_vector_multiply(trans(Ltrans), your_sparse_vector)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sparse_vector_type, 
+        typename Rand_type, 
+        typename T
+        >
+    matrix<T,0,1> cca (
+        const random_subset_selector<sparse_vector_type,Rand_type>& L,
+        const random_subset_selector<sparse_vector_type,Rand_type>& R,
+        matrix<T>& Ltrans,
+        matrix<T>& Rtrans,
+        unsigned long num_correlations,
+        unsigned long extra_rank = 5,
+        unsigned long q = 2,
+        double regularization = 0
+    );
+    /*!
+        requires
+            - num_correlations > 0
+            - L.size() == R.size()
+            - max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0
+              (i.e. L and R can't represent empty matrices)
+            - L and R must contain sparse vectors (see the top of dlib/svm/sparse_vector_abstract.h
+              for a definition of sparse vector)
+            - regularization >= 0
+        ensures
+            - returns cca(L.to_std_vector(), R.to_std_vector(), Ltrans, Rtrans, num_correlations, extra_rank, q)
+              (i.e. this is just a convenience function for calling the cca() routine when
+              your sparse vectors are contained inside a random_subset_selector rather than
+              a std::vector)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CCA_AbSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/statistics/dpca.h b/ml/dlib/dlib/statistics/dpca.h
new file mode 100644
index 000000000..cae784682
--- /dev/null
+++ b/ml/dlib/dlib/statistics/dpca.h
@@ -0,0 +1,541 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DPCA_h_
+#define DLIB_DPCA_h_
+
+#include "dpca_abstract.h"
+#include <limits>
+#include <cmath>
+#include "../algs.h"
+#include "../matrix.h"
+#include <iostream>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class discriminant_pca
+    {
+        /*!
+            INITIAL VALUE
+                - vect_size == 0
+                - total_count == 0
+                - between_count == 0
+                - within_count == 0
+                - between_weight == 1
+                - within_weight == 1
+
+            CONVENTION
+                - vect_size == in_vector_size()
+                - total_count == the number of times add_to_total_variance() has been called.
+                - within_count == the number of times add_to_within_class_variance() has been called.
+                - between_count == the number of times add_to_between_class_variance() has been called.
+                - between_weight == between_class_weight()
+                - within_weight == within_class_weight()
+
+                - if (total_count != 0)
+                    - total_sum == the sum of all vectors given to add_to_total_variance()
+                    - the covariance of all the elements given to add_to_total_variance() is given
+                      by:
+                        - let avg == total_sum/total_count
+                        - covariance == total_cov/total_count - avg*trans(avg)
+                - if (within_count != 0)
+                    - within_cov/within_count == the normalized within class scatter matrix  
+                - if (between_count != 0)
+                    - between_cov/between_count == the normalized between class scatter matrix  
+        !*/
+
+    public:
+
+        struct discriminant_pca_error : public error
+        {
+            discriminant_pca_error(const std::string& message): error(message) {}
+        };
+
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef typename matrix_type::layout_type layout_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
+        typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
+
+        discriminant_pca (
+        ) 
+        {
+            clear();
+        }
+
+        void clear(
+        )
+        {
+            total_count = 0;
+            between_count = 0;
+            within_count = 0;
+
+            vect_size = 0;
+
+
+            between_weight = 1;
+            within_weight = 1;
+
+
+            total_sum.set_size(0);
+            between_cov.set_size(0,0);
+            total_cov.set_size(0,0);
+            within_cov.set_size(0,0);
+        }
+
+        long in_vector_size (
+        ) const
+        {
+            return vect_size;
+        }
+
+        void set_within_class_weight (
+            scalar_type weight
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(weight >= 0,
+                "\t void discriminant_pca::set_within_class_weight()"
+                << "\n\t You can't use negative weight values"
+                << "\n\t weight: " << weight 
+                << "\n\t this:   " << this
+                );
+
+            within_weight = weight;
+        }
+
+        scalar_type within_class_weight (
+        ) const
+        {
+            return within_weight;
+        }
+
+        void set_between_class_weight (
+            scalar_type weight
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(weight >= 0,
+                "\t void discriminant_pca::set_between_class_weight()"
+                << "\n\t You can't use negative weight values"
+                << "\n\t weight: " << weight 
+                << "\n\t this:   " << this
+                );
+
+            between_weight = weight;
+        }
+
+        scalar_type between_class_weight (
+        ) const
+        {
+            return between_weight;
+        }
+
+        template <typename EXP1, typename EXP2>
+        void add_to_within_class_variance(
+            const matrix_exp<EXP1>& x,
+            const matrix_exp<EXP2>& y
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) && 
+                         x.size() == y.size() &&
+                         (in_vector_size() == 0 || x.size() == in_vector_size()),
+                "\t void discriminant_pca::add_to_within_class_variance()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t is_col_vector(x): " << is_col_vector(x) 
+                << "\n\t is_col_vector(y): " << is_col_vector(y) 
+                << "\n\t x.size():         " << x.size() 
+                << "\n\t y.size():         " << y.size() 
+                << "\n\t in_vector_size(): " << in_vector_size() 
+                << "\n\t this:             " << this
+                );
+
+            vect_size = x.size();
+            if (within_count == 0)
+            {
+                within_cov = (x-y)*trans(x-y);
+            }
+            else
+            {
+                within_cov += (x-y)*trans(x-y);
+            }
+            ++within_count;
+        }
+
+        template <typename EXP1, typename EXP2>
+        void add_to_between_class_variance(
+            const matrix_exp<EXP1>& x,
+            const matrix_exp<EXP2>& y
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) && 
+                         x.size() == y.size() &&
+                         (in_vector_size() == 0 || x.size() == in_vector_size()),
+                "\t void discriminant_pca::add_to_between_class_variance()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t is_col_vector(x): " << is_col_vector(x) 
+                << "\n\t is_col_vector(y): " << is_col_vector(y) 
+                << "\n\t x.size():         " << x.size() 
+                << "\n\t y.size():         " << y.size() 
+                << "\n\t in_vector_size(): " << in_vector_size() 
+                << "\n\t this:             " << this
+                );
+
+            vect_size = x.size();
+            if (between_count == 0)
+            {
+                between_cov = (x-y)*trans(x-y);
+            }
+            else
+            {
+                between_cov += (x-y)*trans(x-y);
+            }
+            ++between_count;
+        }
+
+        template <typename EXP>
+        void add_to_total_variance(
+            const matrix_exp<EXP>& x
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(x) && (in_vector_size() == 0 || x.size() == in_vector_size()),
+                "\t void discriminant_pca::add_to_total_variance()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t is_col_vector(x): " << is_col_vector(x) 
+                << "\n\t in_vector_size(): " << in_vector_size() 
+                << "\n\t x.size():         " << x.size() 
+                << "\n\t this:             " << this
+                );
+
+            vect_size = x.size();
+            if (total_count == 0)
+            {
+                total_cov = x*trans(x);
+                total_sum = x;
+            }
+            else
+            {
+                total_cov += x*trans(x);
+                total_sum += x;
+            }
+            ++total_count;
+        }
+
+        const general_matrix dpca_matrix (
+            const double eps = 0.99
+        ) const
+        {
+            general_matrix dpca_mat;
+            general_matrix eigenvalues;
+            dpca_matrix(dpca_mat, eigenvalues, eps);
+            return dpca_mat;
+        }
+
+        const general_matrix dpca_matrix_of_size (
+            const long num_rows 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < num_rows && num_rows <= in_vector_size(),
+                "\t general_matrix discriminant_pca::dpca_matrix_of_size()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t num_rows:         " << num_rows 
+                << "\n\t in_vector_size(): " << in_vector_size() 
+                << "\n\t this:             " << this
+                );
+
+            general_matrix dpca_mat;
+            general_matrix eigenvalues;
+            dpca_matrix_of_size(dpca_mat, eigenvalues, num_rows);
+            return dpca_mat;
+        }
+
+        void dpca_matrix (
+            general_matrix& dpca_mat,
+            general_matrix& eigenvalues,
+            const double eps = 0.99
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < eps && eps <= 1 && in_vector_size() != 0,
+                "\t void discriminant_pca::dpca_matrix()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t eps:              " << eps 
+                << "\n\t in_vector_size(): " << in_vector_size() 
+                << "\n\t this:             " << this
+                );
+
+            compute_dpca_matrix(dpca_mat, eigenvalues, eps, 0);
+        }
+
+        void dpca_matrix_of_size (
+            general_matrix& dpca_mat,
+            general_matrix& eigenvalues,
+            const long num_rows 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < num_rows && num_rows <= in_vector_size(),
+                "\t general_matrix discriminant_pca::dpca_matrix_of_size()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t num_rows:         " << num_rows 
+                << "\n\t in_vector_size(): " << in_vector_size() 
+                << "\n\t this:             " << this
+                );
+
+            compute_dpca_matrix(dpca_mat, eigenvalues, 1, num_rows);
+        }
+
+        void swap (
+            discriminant_pca& item
+        )
+        {
+            using std::swap;
+            swap(total_cov, item.total_cov);
+            swap(total_sum, item.total_sum);
+            swap(total_count, item.total_count);
+            swap(vect_size, item.vect_size);
+            swap(between_cov, item.between_cov);
+
+            swap(between_count, item.between_count);
+            swap(between_weight, item.between_weight);
+            swap(within_cov, item.within_cov);
+            swap(within_count, item.within_count);
+            swap(within_weight, item.within_weight);
+        }
+
+        friend void deserialize (
+            discriminant_pca& item, 
+            std::istream& in
+        )
+        {
+            deserialize( item.total_cov, in);
+            deserialize( item.total_sum, in);
+            deserialize( item.total_count, in);
+            deserialize( item.vect_size, in);
+            deserialize( item.between_cov, in);
+            deserialize( item.between_count, in);
+            deserialize( item.between_weight, in);
+            deserialize( item.within_cov, in);
+            deserialize( item.within_count, in);
+            deserialize( item.within_weight, in);
+        }
+
+        friend void serialize (
+            const discriminant_pca& item, 
+            std::ostream& out 
+        )   
+        {
+            serialize( item.total_cov, out);
+            serialize( item.total_sum, out);
+            serialize( item.total_count, out);
+            serialize( item.vect_size, out);
+            serialize( item.between_cov, out);
+            serialize( item.between_count, out);
+            serialize( item.between_weight, out);
+            serialize( item.within_cov, out);
+            serialize( item.within_count, out);
+            serialize( item.within_weight, out);
+        }
+
+        discriminant_pca operator+ (
+            const discriminant_pca& item
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT((in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()) &&
+                         between_class_weight() == item.between_class_weight() &&
+                         within_class_weight() == item.within_class_weight(),
+                "\t discriminant_pca discriminant_pca::operator+()"
+                << "\n\t The two discriminant_pca objects being added must have compatible parameters"
+                << "\n\t in_vector_size():            " << in_vector_size() 
+                << "\n\t item.in_vector_size():       " << item.in_vector_size() 
+                << "\n\t between_class_weight():      " << between_class_weight() 
+                << "\n\t item.between_class_weight(): " << item.between_class_weight() 
+                << "\n\t within_class_weight():       " << within_class_weight() 
+                << "\n\t item.within_class_weight():  " << item.within_class_weight() 
+                << "\n\t this:                        " << this
+                );
+
+            discriminant_pca temp(item);
+
+            // We need to make sure to ignore empty matrices.  That's what these if statements
+            // are for.
+
+            if (total_count != 0 && temp.total_count != 0)
+            {
+                temp.total_cov += total_cov;
+                temp.total_sum += total_sum;
+                temp.total_count += total_count;
+            }
+            else if (total_count != 0)
+            {
+                temp.total_cov = total_cov;
+                temp.total_sum = total_sum;
+                temp.total_count = total_count;
+            }
+
+            if (between_count != 0 && temp.between_count != 0)
+            {
+                temp.between_cov += between_cov;
+                temp.between_count += between_count;
+            }
+            else if (between_count != 0)
+            {
+                temp.between_cov = between_cov;
+                temp.between_count = between_count;
+            }
+
+            if (within_count != 0 && temp.within_count != 0)
+            {
+                temp.within_cov += within_cov;
+                temp.within_count += within_count;
+            }
+            else if (within_count != 0)
+            {
+                temp.within_cov = within_cov;
+                temp.within_count = within_count;
+            }
+
+            return temp;
+        }
+
+        discriminant_pca& operator+= (
+            const discriminant_pca& rhs
+        )
+        {
+            (*this + rhs).swap(*this);
+            return *this;
+        }
+
+    private:
+
+        void compute_dpca_matrix (
+            general_matrix& dpca_mat,
+            general_matrix& eigenvalues,
+            const double eps,
+            long num_rows 
+        ) const
+        {
+            general_matrix cov;
+
+            // now combine the three measures of variance into a single matrix by using the
+            // within_weight and between_weight weights.
+            cov = get_total_covariance_matrix();
+            if (within_count != 0)
+                cov -= within_weight*within_cov/within_count; 
+            if (between_count != 0)
+                cov += between_weight*between_cov/between_count; 
+
+
+            eigenvalue_decomposition<general_matrix> eig(make_symmetric(cov));
+
+            eigenvalues = eig.get_real_eigenvalues();
+            dpca_mat = eig.get_pseudo_v();
+
+            // sort the eigenvalues and eigenvectors so that the biggest eigenvalues come first
+            rsort_columns(dpca_mat, eigenvalues);
+
+            long num_vectors = 0;
+            if (num_rows == 0)
+            {
+                // Some of the eigenvalues might be negative.  So first lets zero those out
+                // so they won't get considered.
+                eigenvalues = pointwise_multiply(eigenvalues > 0, eigenvalues);
+                // figure out how many eigenvectors we want in our dpca matrix
+                const double thresh = sum(eigenvalues)*eps;
+                double total = 0;
+                for (long r = 0; r < eigenvalues.size() && total < thresh; ++r)
+                {
+                    // Don't even think about looking at eigenvalues that are 0.  If we go this
+                    // far then we have all we need.
+                    if (eigenvalues(r) == 0)
+                        break;
+
+                    ++num_vectors;
+                    total += eigenvalues(r);
+                }
+
+                if (num_vectors == 0)
+                    throw discriminant_pca_error("While performing discriminant_pca, all eigenvalues were negative or 0");
+            }
+            else
+            {
+                num_vectors = num_rows;
+            }
+
+
+            // So now we know we want to use num_vectors of the first eigenvectors.  So
+            // pull those out and discard the rest.
+            dpca_mat = trans(colm(dpca_mat,range(0,num_vectors-1)));
+
+            // also clip off the eigenvalues we aren't using
+            eigenvalues = rowm(eigenvalues, range(0,num_vectors-1));
+
+        }
+
+        general_matrix get_total_covariance_matrix (
+        ) const
+        /*!
+            ensures
+                - returns the covariance matrix of all the data given to the add_to_total_variance()
+        !*/
+        {
+            // if we don't even know the dimensionality of the vectors we are dealing
+            // with then just return an empty matrix
+            if (vect_size == 0)
+                return general_matrix();
+
+            // we know the vector size but we have zero total covariance.  
+            if (total_count == 0)
+            {
+                general_matrix temp(vect_size,vect_size);
+                temp = 0;
+                return temp;
+            }
+
+            // In this case we actually have something to make a total covariance matrix out of. 
+            // So do that.
+            column_matrix avg = total_sum/total_count;
+
+            return total_cov/total_count - avg*trans(avg);
+        }
+
+        general_matrix total_cov;
+        column_matrix total_sum;
+        scalar_type total_count;
+
+        long vect_size;
+
+        general_matrix between_cov;
+        scalar_type between_count;
+        scalar_type between_weight;
+
+        general_matrix within_cov;
+        scalar_type within_count;
+        scalar_type within_weight;
+    };
+
+    template <
+        typename matrix_type
+        >
+    inline void swap (
+        discriminant_pca<matrix_type>& a, 
+        discriminant_pca<matrix_type>& b 
+    ) { a.swap(b); }   
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_DPCA_h_
+
+
diff --git a/ml/dlib/dlib/statistics/dpca_abstract.h b/ml/dlib/dlib/statistics/dpca_abstract.h
new file mode 100644
index 000000000..d9eef635b
--- /dev/null
+++ b/ml/dlib/dlib/statistics/dpca_abstract.h
@@ -0,0 +1,365 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_DPCA_ABSTRaCT_
+#ifdef DLIB_DPCA_ABSTRaCT_
+
+#include <limits>
+#include <cmath>
+#include "../matrix/matrix_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class discriminant_pca
+    {
+        /*!
+            REQUIREMENTS ON matrix_type
+                Must be some type of dlib::matrix.
+
+            INITIAL VALUE
+                - in_vector_size() == 0
+                - between_class_weight() == 1
+                - within_class_weight() == 1
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements the Discriminant PCA technique described in the paper:
+                    A New Discriminant Principal Component Analysis Method with Partial Supervision (2009)
+                    by Dan Sun and Daoqiang Zhang
+
+                This algorithm is basically a straightforward generalization of the classical PCA
+                technique to handle partially labeled data.  It is useful if you want to learn a linear
+                dimensionality reduction rule using a bunch of data that is partially labeled.  
+                
+                It functions by estimating three different scatter matrices.  The first is the total scatter 
+                matrix St (i.e. the total data covariance matrix), the second is the between class scatter 
+                matrix Sb (basically a measure of the variance between data of different classes) and the 
+                third is the within class scatter matrix Sw (a measure of the variance of data within the 
+                same classes).  
+
+                Once these three matrices are estimated they are combined according to the following equation:
+                   S = St + a*Sb - b*Sw
+                Where a and b are user supplied weights.  Then the largest eigenvalues of the S matrix are 
+                computed and their associated eigenvectors are returned as the output of this algorithm.  
+                That is, the desired linear dimensionality reduction is given by the matrix with these 
+                eigenvectors stored in its rows.
+
+                Note that if a and b are set to 0 (or no labeled data is provided) then the output transformation
+                matrix is the same as the one produced by the classical PCA algorithm.
+        !*/
+
+    public:
+
+        struct discriminant_pca_error : public error;
+        /*!
+            This exception is thrown if there is some error that prevents us from creating
+            a DPCA matrix.
+        !*/
+
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef typename matrix_type::layout_type layout_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
+        typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
+
+        discriminant_pca (
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        void clear(
+        );
+        /*!
+            ensures
+                - #*this has its initial value
+        !*/
+
+        long in_vector_size (
+        ) const;
+        /*!
+            ensures
+                - if (this object has been presented with any input vectors) then
+                    - returns the dimension of the column vectors used with this object
+                - else
+                    - returns 0
+        !*/
+
+        void set_within_class_weight (
+            scalar_type weight
+        );
+        /*!
+            requires
+                - weight >= 0
+            ensures
+                - #within_class_weight() == weight
+        !*/
+
+        scalar_type within_class_weight (
+        ) const;
+        /*!
+            ensures
+                - returns the weight used when combining the within class scatter matrix with
+                  the other scatter matrices.  
+        !*/
+
+        void set_between_class_weight (
+            scalar_type weight
+        );
+        /*!
+            requires
+                - weight >= 0
+            ensures
+                - #between_class_weight() == weight
+        !*/
+
+        scalar_type between_class_weight (
+        ) const;
+        /*!
+            ensures
+                - returns the weight used when combining the between class scatter matrix with
+                  the other scatter matrices.  
+        !*/
+
+        void add_to_within_class_variance(
+            const matrix_exp& x,
+            const matrix_exp& y
+        );
+        /*!
+            requires
+                - is_col_vector(x) == true
+                - is_col_vector(y) == true
+                - x.size() == y.size()
+                - if (in_vector_size() != 0) then
+                    - x.size() == y.size() == in_vector_size()
+            ensures
+                - #in_vector_size() == x.size()
+                - Adds (x-y)*trans(x-y) to the within class scatter matrix.
+                  (i.e. the direction given by (x-y) is recorded as being a direction associated
+                  with within class variance and is therefore unimportant and will be weighted
+                  less in the final dimensionality reduction)
+        !*/
+
+        void add_to_between_class_variance(
+            const matrix_exp& x,
+            const matrix_exp& y
+        );
+        /*!
+            requires
+                - is_col_vector(x) == true
+                - is_col_vector(y) == true
+                - x.size() == y.size()
+                - if (in_vector_size() != 0) then
+                    - x.size() == y.size() == in_vector_size()
+            ensures
+                - #in_vector_size() == x.size()
+                - Adds (x-y)*trans(x-y) to the between class scatter matrix.
+                  (i.e. the direction given by (x-y) is recorded as being a direction associated
+                  with between class variance and is therefore important and will be weighted
+                  higher in the final dimensionality reduction)
+        !*/
+
+        void add_to_total_variance(
+            const matrix_exp& x
+        );
+        /*!
+            requires
+                - is_col_vector(x) == true
+                - if (in_vector_size() != 0) then
+                    - x.size() == in_vector_size()
+            ensures
+                - #in_vector_size() == x.size()
+                - let M denote the centroid (or mean) of all the data.  Then this function 
+                  Adds (x-M)*trans(x-M) to the total scatter matrix.
+                  (i.e. the direction given by (x-M) is recorded as being a direction associated
+                  with unlabeled variance and is therefore of default importance and will be weighted
+                  as described in the discriminant_pca class description.)
+        !*/
+
+        const general_matrix dpca_matrix (
+            const double eps = 0.99
+        ) const;
+        /*!
+            requires
+                - 0 < eps <= 1
+                - in_vector_size() != 0
+                  (i.e. you have to have given this object some data)
+            ensures
+                - computes and returns the matrix MAT given by dpca_matrix(MAT,eigen,eps).  
+                  That is, this function returns the dpca_matrix computed by the function
+                  defined below.  
+                - Note that MAT is the desired linear transformation matrix.  That is, 
+                  multiplying a vector by MAT performs the desired linear dimensionality reduction.
+            throws
+                - discriminant_pca_error
+                    This exception is thrown if we are unable to create the dpca_matrix for some 
+                    reason.  For example, if only within class examples have been given or
+                    within_class_weight() is very large then all eigenvalues will be negative and
+                    that prevents this algorithm from working properly.
+        !*/
+
+        void dpca_matrix (
+            general_matrix& dpca_mat,
+            general_matrix& eigenvalues,
+            const double eps = 0.99
+        ) const;
+        /*!
+            requires
+                - 0 < eps <= 1
+                - in_vector_size() != 0
+                  (i.e. you have to have given this object some data)
+            ensures
+                - is_col_vector(#eigenvalues) == true
+                - #dpca_mat.nr() == eigenvalues.size() 
+                - #dpca_mat.nc() == in_vector_size()
+                - rowm(#dpca_mat,i) represents the ith eigenvector of the S matrix described
+                  in the class description and its eigenvalue is given by eigenvalues(i).
+                - all values in #eigenvalues are > 0.  Moreover, the eigenvalues are in
+                  sorted order with the largest eigenvalue stored at eigenvalues(0).
+                - (#dpca_mat)*trans(#dpca_mat) == identity_matrix.  
+                  (i.e. the rows of the dpca_matrix are all unit length vectors and are mutually
+                  orthogonal)
+                - Note that #dpca_mat is the desired linear transformation matrix.  That is, 
+                  multiplying a vector by #dpca_mat performs the desired linear dimensionality 
+                  reduction.
+                - sum(#eigenvalues) will be equal to about eps times the total sum of all 
+                  positive eigenvalues in the S matrix described in this class's description.
+                  This means that eps is a number that controls how "lossy" the dimensionality
+                  reduction will be.  Large values of eps result in more output dimensions 
+                  while smaller values result in fewer. 
+            throws
+                - discriminant_pca_error
+                    This exception is thrown if we are unable to create the dpca_matrix for some 
+                    reason.  For example, if only within class examples have been given or
+                    within_class_weight() is very large then all eigenvalues will be negative and
+                    that prevents this algorithm from working properly.
+        !*/
+
+        const general_matrix dpca_matrix_of_size (
+            const long num_rows 
+        );
+        /*!
+            requires
+                - 0 < num_rows <= in_vector_size()
+            ensures
+                - computes and returns the matrix MAT given by dpca_matrix_of_size(MAT,eigen,num_rows).  
+                  That is, this function returns the dpca_matrix computed by the function
+                  defined below.  
+                - Note that MAT is the desired linear transformation matrix.  That is,
+                  multiplying a vector by MAT performs the desired linear dimensionality
+                  reduction to num_rows dimensions.
+        !*/
+
+        void dpca_matrix_of_size (
+            general_matrix& dpca_mat,
+            general_matrix& eigenvalues,
+            const long num_rows 
+        );
+        /*!
+            requires
+                - 0 < num_rows <= in_vector_size()
+            ensures
+                - is_col_vector(#eigenvalues) == true
+                - #dpca_mat.nr() == eigenvalues.size() 
+                - #dpca_mat.nr() == num_rows 
+                - #dpca_mat.nc() == in_vector_size()
+                - rowm(#dpca_mat,i) represents the ith eigenvector of the S matrix described
+                  in the class description and its eigenvalue is given by eigenvalues(i).
+                - The values in #eigenvalues might be positive or negative.  Additionally, the
+                  eigenvalues are in sorted order with the largest eigenvalue stored at
+                  eigenvalues(0).
+                - (#dpca_mat)*trans(#dpca_mat) == identity_matrix.  
+                  (i.e. the rows of the dpca_matrix are all unit length vectors and are mutually
+                  orthogonal)
+                - Note that #dpca_mat is the desired linear transformation matrix.  That is, 
+                  multiplying a vector by #dpca_mat performs the desired linear dimensionality 
+                  reduction to num_rows dimensions.
+        !*/
+
+        discriminant_pca operator+ (
+            const discriminant_pca& item
+        ) const;
+        /*!
+            requires
+                - in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()
+                  (i.e. the in_vector_size() of *this and item must match or one must be zero)
+                - between_class_weight() == item.between_class_weight()
+                - within_class_weight() == item.within_class_weight()
+            ensures
+                - returns a new discriminant_pca object that represents the combination of all 
+                  the measurements given to *this and item.  That is, this function returns a
+                  discriminant_pca object, R, that is equivalent to what you would obtain if all
+                  modifying calls (e.g. the add_to_*() functions) to *this and item had instead 
+                  been done to R.
+        !*/
+
+        discriminant_pca& operator+= (
+            const discriminant_pca& rhs
+        );
+        /*!
+            requires
+                - in_vector_size() == 0 || rhs.in_vector_size() == 0 || in_vector_size() == rhs.in_vector_size()
+                  (i.e. the in_vector_size() of *this and rhs must match or one must be zero)
+                - between_class_weight() == rhs.between_class_weight()
+                - within_class_weight() == rhs.within_class_weight()
+            ensures
+                - #*this == *item + rhs
+                - returns #*this
+        !*/
+
+        void swap (
+            discriminant_pca& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    inline void swap (
+        discriminant_pca<matrix_type>& a, 
+        discriminant_pca<matrix_type>& b 
+    ) { a.swap(b); }   
+    /*!
+        provides a global swap function
+    !*/
+
+    template <
+        typename matrix_type,
+        >
+    void deserialize (
+        discriminant_pca<matrix_type>& item, 
+        std::istream& in
+    );   
+    /*!
+        provides deserialization support 
+    !*/
+
+    template <
+        typename matrix_type,
+        >
+    void serialize (
+        const discriminant_pca<matrix_type>& item, 
+        std::ostream& out 
+    );   
+    /*!
+        provides serialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_DPCA_ABSTRaCT_
+
diff --git a/ml/dlib/dlib/statistics/image_feature_sampling.h b/ml/dlib/dlib/statistics/image_feature_sampling.h
new file mode 100644
index 000000000..f04f9926e
--- /dev/null
+++ b/ml/dlib/dlib/statistics/image_feature_sampling.h
@@ -0,0 +1,82 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_IMAGE_FEATURE_SaMPLING_Hh_
+#define DLIB_IMAGE_FEATURE_SaMPLING_Hh_
+
+#include "image_feature_sampling_abstract.h"
+#include "../statistics.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_array_type,
+        typename feature_extractor_type,
+        typename pyramid_type
+        >
+    random_subset_selector<typename feature_extractor_type::descriptor_type> randomly_sample_image_features (
+        const image_array_type& images,
+        const pyramid_type& pyr,
+        const feature_extractor_type& fe_,
+        unsigned long num
+    )
+    {
+        feature_extractor_type fe;
+        fe.copy_configuration(fe_);
+        random_subset_selector<typename feature_extractor_type::descriptor_type> basis;
+        basis.set_max_size(num);
+
+        typedef typename image_array_type::type image_type;
+        image_type temp_img, temp_img2;
+
+        for (unsigned long i = 0; i < images.size(); ++i)
+        {
+            bool at_pyramid_top = true;
+            while (true)
+            {
+                if (at_pyramid_top)
+                    fe.load(images[i]);
+                else
+                    fe.load(temp_img);
+                
+                if (fe.size() == 0)
+                    break;
+
+                for (long r = 0; r < fe.nr(); ++r)
+                {
+                    for (long c = 0; c < fe.nc(); ++c)
+                    {
+                        if (basis.next_add_accepts())
+                        {
+                            basis.add(fe(r,c));
+                        }
+                        else
+                        {
+                            basis.add();
+                        }
+                    }
+                }
+
+                if (at_pyramid_top)
+                {
+                    at_pyramid_top = false;
+                    pyr(images[i], temp_img);
+                }
+                else
+                {
+                    pyr(temp_img, temp_img2);
+                    swap(temp_img2,temp_img);
+                }
+            }
+        }
+        return basis;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_IMAGE_FEATURE_SaMPLING_Hh_
+
diff --git a/ml/dlib/dlib/statistics/image_feature_sampling_abstract.h b/ml/dlib/dlib/statistics/image_feature_sampling_abstract.h
new file mode 100644
index 000000000..b51ef5423
--- /dev/null
+++ b/ml/dlib/dlib/statistics/image_feature_sampling_abstract.h
@@ -0,0 +1,45 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_
+#ifdef DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_
+
+#include "random_subset_selector_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_array_type,
+        typename feature_extractor_type,
+        typename pyramid_type
+        >
+    random_subset_selector<typename feature_extractor_type::descriptor_type> randomly_sample_image_features (
+        const image_array_type& images,
+        const pyramid_type& pyr,
+        const feature_extractor_type& fe,
+        unsigned long num
+    );
+    /*!
+        requires
+            - pyramid_type == a type compatible with the image pyramid objects defined 
+              in dlib/image_transforms/image_pyramid_abstract.h
+            - feature_extractor_type == a local image feature extractor type such as the
+              dlib::hog_image
+            - image_array_type == an implementation of dlib/array/array_kernel_abstract.h
+              and it must contain image objects which can be passed to pyr() and fe.load()
+              and are swappable by global swap().
+        ensures
+            - creates an image pyramid for each image in images and performs feature
+              extraction on each pyramid level.  Then selects a random subsample of at 
+              most num local feature vectors and returns it.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/statistics/lda.h b/ml/dlib/dlib/statistics/lda.h
new file mode 100644
index 000000000..38de3fd1e
--- /dev/null
+++ b/ml/dlib/dlib/statistics/lda.h
@@ -0,0 +1,237 @@
+// Copyright (C) 2014  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_LDA_Hh_
+#define DLIB_LDA_Hh_
+
+#include "lda_abstract.h"
+#include "../algs.h"
+#include <map>
+#include "../matrix.h"
+#include <vector>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+
+        inline std::map<unsigned long,unsigned long> make_class_labels(
+            const std::vector<unsigned long>& row_labels
+        )
+        {
+            std::map<unsigned long,unsigned long> class_labels;
+            for (unsigned long i = 0; i < row_labels.size(); ++i)
+            {
+                const unsigned long next = class_labels.size();
+                if (class_labels.count(row_labels[i]) == 0)
+                    class_labels[row_labels[i]] = next;
+            }
+            return class_labels;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename T
+            >
+        matrix<T,0,1> center_matrix (
+            matrix<T>& X
+        )
+        {
+            matrix<T,1> mean;
+            for (long r = 0; r < X.nr(); ++r)
+                mean += rowm(X,r);
+            mean /= X.nr();
+
+            for (long r = 0; r < X.nr(); ++r)
+                set_rowm(X,r) -= mean;
+
+            return trans(mean);
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    void compute_lda_transform (
+        matrix<T>& X,
+        matrix<T,0,1>& mean,
+        const std::vector<unsigned long>& row_labels,
+        unsigned long lda_dims = 500,
+        unsigned long extra_pca_dims = 200
+    )
+    {
+        std::map<unsigned long,unsigned long> class_labels = impl::make_class_labels(row_labels);
+        // LDA can only give out at most class_labels.size()-1 dimensions so don't try to
+        // compute more than that.
+        lda_dims = std::min<unsigned long>(lda_dims, class_labels.size()-1);
+
+        // make sure requires clause is not broken
+        DLIB_CASSERT(class_labels.size() > 1,
+            "\t void compute_lda_transform()"
+            << "\n\t You can't call this function if the number of distinct class labels is less than 2."
+            );
+        DLIB_CASSERT(X.size() != 0 && (long)row_labels.size() == X.nr() && lda_dims != 0,
+            "\t void compute_lda_transform()"
+            << "\n\t Invalid inputs were given to this function."
+            << "\n\t X.size():          " << X.size()
+            << "\n\t row_labels.size(): " << row_labels.size()
+            << "\n\t lda_dims:          " << lda_dims
+            );
+
+
+        mean = impl::center_matrix(X);
+        // Do PCA to reduce dims
+        matrix<T> pu,pw,pv;
+        svd_fast(X, pu, pw, pv, lda_dims+extra_pca_dims, 4);
+        pu.set_size(0,0); // free RAM, we don't need pu.
+        X = X*pv;
+
+
+        matrix<T> class_means(class_labels.size(), X.nc());
+        class_means = 0;
+        matrix<T,0,1> class_counts(class_labels.size());
+        class_counts = 0;
+
+        // First compute the means of each class
+        for (unsigned long i = 0; i < row_labels.size(); ++i)
+        {
+            const unsigned long class_idx = class_labels[row_labels[i]];
+            set_rowm(class_means,class_idx) += rowm(X,i);
+            class_counts(class_idx)++;
+        }
+        class_means = inv(diagm(class_counts))*class_means;
+        // subtract means from the data
+        for (unsigned long i = 0; i < row_labels.size(); ++i)
+        {
+            const unsigned long class_idx = class_labels[row_labels[i]];
+            set_rowm(X,i) -= rowm(class_means,class_idx);
+        }
+
+        // Note that we are using the formulas from the paper Using Discriminant
+        // Eigenfeatures for Image Retrieval by Swets and Weng.
+        matrix<T> Sw = trans(X)*X;
+        matrix<T> Sb = trans(class_means)*class_means;
+        matrix<T> A, H;
+        matrix<T,0,1> W;
+        svd3(Sw, A, W, H);
+        W = sqrt(W);
+        W = reciprocal(lowerbound(W,max(W)*1e-5));
+        A = trans(H*diagm(W))*Sb*H*diagm(W);
+        matrix<T> v,s,u;
+        svd3(A, v, s, u);
+        matrix<T> tform = H*diagm(W)*u;
+        // pick out only the number of dimensions we are supposed to for the output, unless
+        // we should just keep them all, then don't do anything. 
+        if ((long)lda_dims <= tform.nc())
+        {
+            rsort_columns(tform, s);
+            tform = colm(tform, range(0, lda_dims-1));
+        }
+
+        X = trans(pv*tform);
+        mean = X*mean;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    inline std::pair<double,double> equal_error_rate (
+        const std::vector<double>& low_vals,
+        const std::vector<double>& high_vals 
+    )
+    {
+        std::vector<std::pair<double,int> > temp;
+        temp.reserve(low_vals.size()+high_vals.size());
+        for (unsigned long i = 0; i < low_vals.size(); ++i)
+            temp.push_back(std::make_pair(low_vals[i], -1));
+        for (unsigned long i = 0; i < high_vals.size(); ++i)
+            temp.push_back(std::make_pair(high_vals[i], +1));
+
+        std::sort(temp.begin(), temp.end());
+
+        if (temp.size() == 0)
+            return std::make_pair(0,0);
+
+        double thresh = temp[0].first;
+
+        unsigned long num_low_wrong = low_vals.size();
+        unsigned long num_high_wrong = 0;
+        double low_error = num_low_wrong/(double)low_vals.size();
+        double high_error = num_high_wrong/(double)high_vals.size();
+        for (unsigned long i = 0; i < temp.size() && high_error < low_error; ++i)
+        {
+            thresh = temp[i].first;
+            if (temp[i].second > 0)
+            {
+                num_high_wrong++;
+                high_error = num_high_wrong/(double)high_vals.size();
+            }
+            else
+            {
+                num_low_wrong--;
+                low_error = num_low_wrong/(double)low_vals.size();
+            }
+        }
+
+        return std::make_pair((low_error+high_error)/2, thresh);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    struct roc_point
+    {
+        double true_positive_rate;
+        double false_positive_rate;
+        double detection_threshold;
+    };
+
+    inline std::vector<roc_point> compute_roc_curve (
+        const std::vector<double>& true_detections,
+        const std::vector<double>& false_detections 
+    )
+    {
+        DLIB_CASSERT(true_detections.size() != 0);
+        DLIB_CASSERT(false_detections.size() != 0);
+
+        std::vector<std::pair<double,int> > temp;
+        temp.reserve(true_detections.size()+false_detections.size());
+        for (unsigned long i = 0; i < true_detections.size(); ++i)
+            temp.push_back(std::make_pair(true_detections[i], +1));
+        for (unsigned long i = 0; i < false_detections.size(); ++i)
+            temp.push_back(std::make_pair(false_detections[i], -1));
+
+        std::sort(temp.rbegin(), temp.rend());
+
+
+        std::vector<roc_point> roc_curve;
+        roc_curve.reserve(temp.size());
+
+        double num_false_included = 0;
+        double num_true_included = 0;
+        for (unsigned long i = 0; i < temp.size(); ++i)
+        {
+            if (temp[i].second > 0)
+                num_true_included++;
+            else
+                num_false_included++;
+
+            roc_point p;
+            p.true_positive_rate = num_true_included/true_detections.size();
+            p.false_positive_rate = num_false_included/false_detections.size();
+            p.detection_threshold = temp[i].first;
+            roc_curve.push_back(p);
+        }
+
+        return roc_curve;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LDA_Hh_
+
diff --git a/ml/dlib/dlib/statistics/lda_abstract.h b/ml/dlib/dlib/statistics/lda_abstract.h
new file mode 100644
index 000000000..ab9fd7a32
--- /dev/null
+++ b/ml/dlib/dlib/statistics/lda_abstract.h
@@ -0,0 +1,118 @@
+// Copyright (C) 2014  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_LDA_ABSTRACT_Hh_
+#ifdef DLIB_LDA_ABSTRACT_Hh_
+
+#include <map>
+#include "../matrix.h"
+#include <vector>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    void compute_lda_transform (
+        matrix<T>& X,
+        matrix<T,0,1>& M,
+        const std::vector<unsigned long>& row_labels,
+        unsigned long lda_dims = 500,
+        unsigned long extra_pca_dims = 200
+    );
+    /*!
+        requires
+            - X.size() != 0
+            - row_labels.size() == X.nr()
+            - The number of distinct values in row_labels > 1
+            - lda_dims != 0
+        ensures
+            - We interpret X as a collection X.nr() of input vectors, where each row of X
+              is one of the vectors.
+            - We interpret row_labels[i] as the label of the vector rowm(X,i).
+            - This function performs the dimensionality reducing version of linear
+              discriminant analysis.  That is, you give it a set of labeled vectors and it
+              returns a linear transform that maps the input vectors into a new space that
+              is good for distinguishing between the different classes.  In particular,
+              this function finds matrices Z and M such that:
+                - Given an input vector x, Z*x-M, is the transformed version of x.  That is,
+                  Z*x-M maps x into a space where x vectors that share the same class label
+                  are near each other. 
+                - Z*x-M results in the transformed vectors having zero expected mean.
+                - Z.nr() <= lda_dims
+                  (it might be less than lda_dims if there are not enough distinct class
+                  labels to support lda_dims dimensions).
+                - Z.nc() == X.nc()
+                - We overwrite the input matrix X and store Z in it.  Therefore, the
+                  outputs of this function are in X and M.
+            - In order to deal with very high dimensional inputs, we perform PCA internally
+              to map the input vectors into a space of at most lda_dims+extra_pca_dims
+              prior to performing LDA.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    std::pair<double,double> equal_error_rate (
+        const std::vector<double>& low_vals,
+        const std::vector<double>& high_vals 
+    );
+    /*!
+        ensures
+            - This function finds a threshold T that best separates the elements of
+              low_vals from high_vals by selecting the threshold with equal error rate.  In
+              particular, we try to pick a threshold T such that:
+                - for all valid i:
+                    - high_vals[i] >= T
+                - for all valid i:
+                    - low_vals[i] < T
+              Where the best T is determined such that the fraction of low_vals >= T is the
+              same as the fraction of high_vals < T.
+            - Let ERR == the equal error rate.  I.e. the fraction of times low_vals >= T
+              and high_vals < T.  Note that 0 <= ERR <= 1.
+            - returns make_pair(ERR,T) 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    struct roc_point
+    {
+        double true_positive_rate;
+        double false_positive_rate;
+        double detection_threshold;
+    };
+
+    std::vector<roc_point> compute_roc_curve (
+        const std::vector<double>& true_detections,
+        const std::vector<double>& false_detections 
+    );
+    /*!
+        requires
+            - true_detections.size() != 0
+            - false_detections.size() != 0
+        ensures
+            - This function computes the ROC curve (receiver operating characteristic)
+              curve of the given data.  Therefore, we interpret true_detections as
+              containing detection scores for a bunch of true detections and
+              false_detections as detection scores from a bunch of false detections.  A
+              perfect detector would always give higher scores to true detections than to
+              false detections, resulting in a true positive rate of 1 and a false positive
+              rate of 0, for some appropriate detection threshold.
+            - Returns an array, ROC, such that:
+                - ROC.size() == true_detections.size()+false_detections.size()
+                - for all valid i:
+                    - If you were to accept all detections with a score >= ROC[i].detection_threshold 
+                      then you would obtain a true positive rate of ROC[i].true_positive_rate and a 
+                      false positive rate of ROC[i].false_positive_rate.
+                - ROC is ordered such that low detection rates come first.  That is, the
+                  curve is swept from a high detection threshold to a low threshold.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LDA_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/statistics/random_subset_selector.h b/ml/dlib/dlib/statistics/random_subset_selector.h
new file mode 100644
index 000000000..17492363d
--- /dev/null
+++ b/ml/dlib/dlib/statistics/random_subset_selector.h
@@ -0,0 +1,372 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_RANDOM_SUBSeT_SELECTOR_H_
+#define DLIB_RANDOM_SUBSeT_SELECTOR_H_
+
+#include "random_subset_selector_abstract.h"
+#include "../rand.h"
+#include <vector>
+#include "../algs.h"
+#include "../string.h"
+#include "../serialize.h"
+#include "../matrix/matrix_mat.h"
+#include <iostream>
+
+namespace dlib
+{
+    template <
+        typename T,
+        typename Rand_type = dlib::rand
+        >
+    class random_subset_selector
+    {
+        /*!
+            INITIAL VALUE
+                - _max_size == 0
+                - items.size() == 0
+                - count == 0
+                - _next_add_accepts == false
+
+            CONVENTION
+                - count == the number of times add() has been called since the last
+                  time this object was empty.
+                - items.size() == size()
+                - max_size() == _max_size
+                - next_add_accepts() == _next_add_accepts
+        !*/
+    public:
+        typedef T type;
+        typedef T value_type;
+        typedef default_memory_manager mem_manager_type;
+        typedef Rand_type rand_type;
+
+        typedef typename std::vector<T>::iterator iterator;
+        typedef typename std::vector<T>::const_iterator const_iterator;
+
+
+        random_subset_selector (
+        )
+        {
+            _max_size = 0;
+            make_empty();
+        }
+
+        void set_seed(const std::string& value)
+        {
+            rnd.set_seed(value);
+        }
+
+        void make_empty (
+        )
+        {
+            items.resize(0);
+            count = 0;
+            update_next_add_accepts();
+        }
+
+        const std::vector<T>& to_std_vector(
+        ) const { return items; }
+
+        size_t size (
+        ) const 
+        {
+            return items.size();
+        }
+
+        void set_max_size (
+            unsigned long new_max_size
+        )
+        {
+            items.reserve(new_max_size);
+            make_empty();
+            _max_size = new_max_size;
+            update_next_add_accepts();
+        }
+
+        unsigned long max_size (
+        ) const
+        {
+            return _max_size;
+        }
+
+        T& operator[] (
+            unsigned long idx
+        ) 
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(idx < size(),
+                "\tvoid random_subset_selector::operator[]()"
+                << "\n\t idx is out of range"
+                << "\n\t idx:    " << idx 
+                << "\n\t size(): " << size() 
+                << "\n\t this:   " << this
+                );
+
+            return items[idx];
+        }
+
+        const T& operator[] (
+            unsigned long idx
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(idx < size(),
+                "\tvoid random_subset_selector::operator[]()"
+                << "\n\t idx is out of range"
+                << "\n\t idx:    " << idx 
+                << "\n\t size(): " << size() 
+                << "\n\t this:   " << this
+                );
+
+            return items[idx];
+        }
+
+        iterator                begin()                         { return items.begin(); }
+        const_iterator          begin() const                   { return items.begin(); }
+        iterator                end()                           { return items.end(); }
+        const_iterator          end() const                     { return items.end(); }
+
+        bool next_add_accepts (
+        ) const 
+        {
+            return _next_add_accepts;
+        }
+
+        void add (
+            const T& new_item
+        )
+        {
+            if (items.size() < _max_size)
+            {
+                items.push_back(new_item);
+                // swap into a random place
+                exchange(items[rnd.get_random_32bit_number()%items.size()], items.back());
+            }
+            else if (_next_add_accepts)
+            {
+                // pick a random element of items and replace it.
+                items[rnd.get_random_32bit_number()%items.size()] = new_item;
+            }
+
+            update_next_add_accepts();
+            ++count;
+        }
+
+        void add (
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(next_add_accepts() == false,
+                "\tvoid random_subset_selector::add()"
+                << "\n\t You should be calling the version of add() that takes an argument"
+                << "\n\t this: " << this
+                );
+
+            update_next_add_accepts();
+            ++count;
+        }
+
+        void swap (
+            random_subset_selector& a
+        )
+        {
+            items.swap(a.items);
+            std::swap(_max_size, a._max_size);
+            std::swap(count, a.count);
+            rnd.swap(a.rnd);
+            std::swap(_next_add_accepts, a._next_add_accepts);
+        }
+
+        template <typename T1, typename T2>
+        friend void serialize (
+            const random_subset_selector<T1,T2>& item,
+            std::ostream& out
+        );
+
+        template <typename T1, typename T2>
+        friend void deserialize (
+            random_subset_selector<T1,T2>& item,
+            std::istream& in 
+        );
+
+    private:
+
+        void update_next_add_accepts (
+        )
+        {
+            if (items.size() < _max_size)
+            {
+                _next_add_accepts = true;
+            }
+            else if (_max_size == 0)
+            {
+                _next_add_accepts = false;
+            }
+            else
+            {
+                // At this point each element of items has had an equal chance of being in this object.   
+                // In particular, the probability that each arrived here is currently items.size()/count.    
+                // We need to be able to say that, after this function ends, the probability of any 
+                // particular object ending up in items is items.size()/(count+1).  So this means that 
+                // we should decide to add a new item into items with this probability.  Also, if we do 
+                // so then we pick one of the current items and replace it at random with the new item.
+
+                // Make me a random 64 bit number.   This might seem excessive but I want this object
+                // to be able to handle an effectively infinite number of calls to add().  So count
+                // might get very large and we need to deal with that properly.
+                const unsigned long num1 = rnd.get_random_32bit_number();
+                const unsigned long num2 = rnd.get_random_32bit_number();
+                uint64 num = num1;
+                num <<= 32;
+                num |= num2;
+
+                num %= (count+1);
+
+                _next_add_accepts = (num < items.size());
+            }
+
+        }
+
+        std::vector<T> items;
+        unsigned long _max_size;
+        uint64 count; 
+
+        rand_type rnd;
+
+        bool _next_add_accepts;
+
+    };
+
+    template <
+        typename T,
+        typename rand_type 
+        >
+    void swap (
+        random_subset_selector<T,rand_type>& a,
+        random_subset_selector<T,rand_type>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T1, typename T2>
+    void serialize (
+        const random_subset_selector<T1,T2>& item,
+        std::ostream& out
+    )
+    {
+        serialize(item.items, out);
+        serialize(item._max_size, out);
+        serialize(item.count, out);
+        serialize(item.rnd, out);
+        serialize(item._next_add_accepts, out);
+    }
+
+    template <typename T1, typename T2>
+    void deserialize (
+        random_subset_selector<T1,T2>& item,
+        std::istream& in 
+    )
+    {
+        deserialize(item.items, in);
+        deserialize(item._max_size, in);
+        deserialize(item.count, in);
+        deserialize(item.rnd, in);
+        deserialize(item._next_add_accepts, in);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename alloc
+        >
+    random_subset_selector<T> randomly_subsample (
+        const std::vector<T,alloc>& samples,
+        unsigned long num
+    )
+    {
+        random_subset_selector<T> subset;
+        subset.set_max_size(num);
+        for (unsigned long i = 0; i < samples.size(); ++i)
+            subset.add(samples[i]);
+        return subset;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename alloc,
+        typename U
+        >
+    random_subset_selector<T> randomly_subsample (
+        const std::vector<T,alloc>& samples,
+        unsigned long num,
+        const U& random_seed
+    )
+    {
+        random_subset_selector<T> subset;
+        subset.set_seed(cast_to_string(random_seed));
+        subset.set_max_size(num);
+        for (unsigned long i = 0; i < samples.size(); ++i)
+            subset.add(samples[i]);
+        return subset;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    random_subset_selector<T> randomly_subsample (
+        const random_subset_selector<T>& samples,
+        unsigned long num
+    )
+    {
+        random_subset_selector<T> subset;
+        subset.set_max_size(num);
+        for (unsigned long i = 0; i < samples.size(); ++i)
+            subset.add(samples[i]);
+        return subset;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    random_subset_selector<T> randomly_subsample (
+        const random_subset_selector<T>& samples,
+        unsigned long num,
+        const U& random_seed
+    )
+    {
+        random_subset_selector<T> subset;
+        subset.set_seed(cast_to_string(random_seed));
+        subset.set_max_size(num);
+        for (unsigned long i = 0; i < samples.size(); ++i)
+            subset.add(samples[i]);
+        return subset;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    const matrix_op<op_array_to_mat<random_subset_selector<T> > > mat (
+        const random_subset_selector<T>& m 
+    )
+    {
+        typedef op_array_to_mat<random_subset_selector<T> > op;
+        return matrix_op<op>(op(m));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RANDOM_SUBSeT_SELECTOR_H_
+
+
diff --git a/ml/dlib/dlib/statistics/random_subset_selector_abstract.h b/ml/dlib/dlib/statistics/random_subset_selector_abstract.h
new file mode 100644
index 000000000..96f8b545d
--- /dev/null
+++ b/ml/dlib/dlib/statistics/random_subset_selector_abstract.h
@@ -0,0 +1,388 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_
+#ifdef DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_
+
+#include <vector>
+#include "../rand/rand_kernel_abstract.h"
+#include "../algs.h"
+#include "../string.h"
+
+namespace dlib
+{
+    template <
+        typename T,
+        typename Rand_type = dlib::rand
+        >
+    class random_subset_selector
+    {
+        /*!
+            REQUIREMENTS ON T
+                T must be a copyable type
+
+            REQUIREMENTS ON Rand_type
+                must be an implementation of dlib/rand/rand_kernel_abstract.h
+
+            INITIAL VALUE
+                - size() == 0
+                - max_size() == 0
+                - next_add_accepts() == false
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool to help you select a random subset of a large body of data.  
+                In particular, it is useful when the body of data is too large to fit into memory.
+
+                So for example, suppose you have 1000000 data samples and you want to select a
+                random subset of size 1000.   Then you could do that as follows:
+                    
+                    random_subset_selector<sample_type> rand_subset;
+                    rand_subset.set_max_size(1000)
+                    for (int i = 0; i < 1000000; ++i)
+                        rand_subset.add( get_next_data_sample());
+
+              
+                At the end of the for loop you will have your random subset of 1000 samples.  And by
+                random I mean that each of the 1000000 data samples has an equal chance of ending
+                up in the rand_subset object.
+
+
+                Note that the above example calls get_next_data_sample() for each data sample.  This 
+                may be inefficient since most of the data samples are just ignored.  An alternative 
+                method that doesn't require you to load each sample can also be used.  Consider the 
+                following:
+
+                    random_subset_selector<sample_type> rand_subset;
+                    rand_subset.set_max_size(1000)
+                    for (int i = 0; i < 1000000; ++i)
+                        if (rand_subset.next_add_accepts())
+                            rand_subset.add(get_data_sample(i));
+                        else
+                            rand_subset.add() 
+
+                In the above example we only actually fetch the data sample into memory if we
+                know that the rand_subset would include it into the random subset.  Otherwise,
+                we can just call the empty add().
+                
+
+                Finally, note that the random_subset_selector uses a deterministic pseudo-random
+                number generator under the hood.  Moreover, the default constructor always seeds
+                the random number generator in the same way.  So unless you call set_seed() 
+                each instance of the random_subset_selector will function identically.
+        !*/
+    public:
+        typedef T type;
+        typedef T value_type;
+        typedef default_memory_manager mem_manager_type;
+        typedef Rand_type rand_type;
+
+        typedef typename std::vector<T>::iterator iterator;
+        typedef typename std::vector<T>::const_iterator const_iterator;
+
+        random_subset_selector (
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        void set_seed(
+            const std::string& value
+        );
+        /*!
+            ensures
+                - sets the seed of the random number generator that is embedded in
+                  this object to the given value.
+        !*/
+
+        void make_empty (
+        );
+        /*!
+            ensures
+                - #size() == 0
+        !*/
+
+        size_t size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of items of type T currently contained in this object
+        !*/
+
+        void set_max_size (
+            unsigned long new_max_size
+        );
+        /*!
+            ensures
+                - #max_size() == new_max_size
+                - #size() == 0
+        !*/
+
+        unsigned long max_size (
+        ) const;
+        /*!
+            ensures
+                - returns the maximum allowable size for this object
+        !*/
+
+        T& operator[] (
+            unsigned long idx
+        );
+        /*!
+            requires
+                - idx < size()
+            ensures
+                - returns a non-const reference to the idx'th element of this object
+        !*/
+
+        const T& operator[] (
+            unsigned long idx
+        ) const;
+        /*!
+            requires
+                - idx < size()
+            ensures
+                - returns a const reference to the idx'th element of this object
+        !*/
+
+        bool next_add_accepts (
+        ) const;
+        /*!
+            ensures
+                - if (the next call to add(item) will result in item being included
+                  into *this) then
+                    - returns true
+                    - Note that the next item will always be accepted if size() < max_size().
+                - else
+                    - returns false
+                    - Note that the next item will never be accepted if max_size() == 0.
+        !*/
+
+        void add (
+            const T& new_item
+        );
+        /*!
+            ensures
+                - if (next_add_accepts()) then
+                    - places new_item into *this object at a random location
+                    - if (size() < max_size()) then
+                        - #size() == size() + 1
+                - #next_add_accepts() == The updated information about the acceptance
+                  of the next call to add()
+        !*/
+
+        void add (
+        );
+        /*!
+            requires
+                - next_add_accepts() == false
+            ensures
+                - This function does nothing but update the value of #next_add_accepts()
+        !*/
+
+        iterator begin(
+        );
+        /*!
+            ensures
+                - if (size() > 0) then
+                    - returns an iterator referring to the first element in 
+                      this container.
+                - else
+                    - returns end()
+        !*/
+        
+        const_iterator begin(
+        ) const;
+        /*!
+            ensures
+                - if (size() > 0) then
+                    - returns a const_iterator referring to the first element in 
+                      this container.
+                - else
+                    - returns end()
+        !*/
+
+        iterator end(
+        ); 
+        /*!
+            ensures
+                - returns an iterator that represents one past the end of
+                  this container
+        !*/
+
+        const_iterator end(
+        ) const;
+        /*!
+            ensures
+                - returns an iterator that represents one past the end of
+                  this container
+        !*/
+
+        const std::vector<T>& to_std_vector(
+        ) const;
+        /*!
+            ensures
+                - returns a const reference to the underlying std::vector<T> that contains
+                  all elements in this object.  That is, this function returns a vector, V,
+                  which has the following properties:  
+                    - V.size()  == this->size()
+                    - V.begin() == this->begin()
+                    - V.end()   == this->end()
+        !*/
+
+        void swap (
+            random_subset_selector& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+
+    };
+
+    template <
+        typename T,
+        typename rand_type 
+        >
+    void swap (
+        random_subset_selector<T,rand_type>& a,
+        random_subset_selector<T,rand_type>& b
+    ) { a.swap(b); }
+    /*!
+        provides global swap support
+    !*/
+
+    template <
+        typename T,
+        typename rand_type 
+        >
+    void serialize (
+        const random_subset_selector<T,rand_type>& item,
+        std::ostream& out 
+    );   
+    /*!
+        provides serialization support 
+    !*/
+
+    template <
+        typename T,
+        typename rand_type 
+        >
+    void deserialize (
+        random_subset_selector<T,rand_type>& item,
+        std::istream& in
+    );   
+    /*!
+        provides deserialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename alloc
+        >
+    random_subset_selector<T> randomly_subsample (
+        const std::vector<T,alloc>& samples,
+        unsigned long num
+    );
+    /*!
+        ensures
+            - returns a random subset R such that:
+                - R contains a random subset of the given samples
+                - R.size() == min(num, samples.size())
+                - R.max_size() == num
+            - The random number generator used by this function will always be
+              initialized in the same way.  I.e. this function will always pick
+              the same random subsample if called multiple times.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename alloc,
+        typename U
+        >
+    random_subset_selector<T> randomly_subsample (
+        const std::vector<T,alloc>& samples,
+        unsigned long num,
+        const U& random_seed
+    );
+    /*!
+        requires
+            - random_seed must be convertible to a string by dlib::cast_to_string()
+        ensures
+            - returns a random subset R such that:
+                - R contains a random subset of the given samples
+                - R.size() == min(num, samples.size())
+                - R.max_size() == num
+            - The given random_seed will be used to initialize the random number
+              generator used by this function.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    random_subset_selector<T> randomly_subsample (
+        const random_subset_selector<T>& samples,
+        unsigned long num
+    );
+    /*!
+        ensures
+            - returns a random subset R such that:
+                - R contains a random subset of the given samples
+                - R.size() == min(num, samples.size())
+                - R.max_size() == num
+            - The random number generator used by this function will always be
+              initialized in the same way.  I.e. this function will always pick
+              the same random subsample if called multiple times.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    random_subset_selector<T> randomly_subsample (
+        const random_subset_selector<T>& samples,
+        unsigned long num,
+        const U& random_seed
+    );
+    /*!
+        requires
+            - random_seed must be convertible to a string by dlib::cast_to_string()
+        ensures
+            - returns a random subset R such that:
+                - R contains a random subset of the given samples
+                - R.size() == min(num, samples.size())
+                - R.max_size() == num
+            - The given random_seed will be used to initialize the random number
+              generator used by this function.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    const matrix_exp mat (
+        const random_subset_selector<T>& m 
+    );
+    /*!
+        ensures
+            - returns a matrix R such that:
+                - is_col_vector(R) == true 
+                - R.size() == m.size()
+                - for all valid r:
+                  R(r) == m[r]
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_
+
diff --git a/ml/dlib/dlib/statistics/running_gradient.h b/ml/dlib/dlib/statistics/running_gradient.h
new file mode 100644
index 000000000..d3f1b3ddf
--- /dev/null
+++ b/ml/dlib/dlib/statistics/running_gradient.h
@@ -0,0 +1,370 @@
+// Copyright (C) 2016  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_RuNNING_GRADIENT_Hh_
+#define DLIB_RuNNING_GRADIENT_Hh_
+
+#include "running_gradient_abstract.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include <cmath>
+#include "../matrix.h"
+#include <algorithm>
+
+
+namespace dlib
+{
+    class running_gradient 
+    {
+    public:
+
+        running_gradient (
+        )
+        {
+            clear();
+        }
+
+        void clear(
+        )
+        {
+            n = 0;
+            R = identity_matrix<double>(2)*1e6;
+            w = 0;
+            residual_squared = 0;
+        }
+
+        double current_n (
+        ) const
+        {
+            return n;
+        }
+
+        void add(
+            double y
+        )
+        {
+            matrix<double,2,1> x;
+            x = n, 1;
+
+            // Do recursive least squares computations
+            const double temp = 1 + trans(x)*R*x;
+            matrix<double,2,1> tmp = R*x;
+            R = R - (tmp*trans(tmp))/temp;
+            // R should always be symmetric.  This line improves numeric stability of this algorithm.
+            R = 0.5*(R + trans(R));
+            w = w + R*x*(y - trans(x)*w);
+
+            // Also, recursively keep track of the residual error between the given value
+            // and what our linear predictor outputs.
+            residual_squared = residual_squared + std::pow((y - trans(x)*w),2.0)*temp;
+
+            ++n;
+        }
+
+        double gradient (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\t double running_gradient::gradient()"
+                << "\n\t You must add more values into this object before calling this function."
+                << "\n\t this: " << this
+                );
+
+            return w(0);
+        }
+
+        double intercept (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 0,
+                "\t double running_gradient::intercept()"
+                << "\n\t You must add more values into this object before calling this function."
+                << "\n\t this: " << this
+                );
+
+            return w(1);
+        }
+        double standard_error ( 
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 2,
+                "\t double running_gradient::standard_error()"
+                << "\n\t You must add more values into this object before calling this function."
+                << "\n\t this: " << this
+                );
+
+
+            const double s = residual_squared/(n-2);
+            const double adjust = 12.0/(std::pow(current_n(),3.0) - current_n());
+            return std::sqrt(s*adjust);
+        }
+
+        double probability_gradient_less_than (
+            double thresh
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 2,
+                "\t double running_gradient::probability_gradient_less_than()"
+                << "\n\t You must add more values into this object before calling this function."
+                << "\n\t this: " << this
+                );
+
+            return normal_cdf(thresh, gradient(), standard_error());
+        }
+
+        double probability_gradient_greater_than (
+            double thresh
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 2,
+                "\t double running_gradient::probability_gradient_greater_than()"
+                << "\n\t You must add more values into this object before calling this function."
+                << "\n\t this: " << this
+                );
+
+            return 1-probability_gradient_less_than(thresh);
+        }
+
+        friend void serialize (const running_gradient& item, std::ostream& out)
+        {
+            int version = 1;
+            serialize(version, out);
+            serialize(item.n, out);
+            serialize(item.R, out);
+            serialize(item.w, out);
+            serialize(item.residual_squared, out);
+        }
+
+        friend void deserialize (running_gradient& item, std::istream& in)
+        {
+            int version = 0;
+            deserialize(version, in);
+            if (version != 1)
+                throw serialization_error("Unexpected version found while deserializing dlib::running_gradient.");
+            deserialize(item.n, in);
+            deserialize(item.R, in);
+            deserialize(item.w, in);
+            deserialize(item.residual_squared, in);
+        }
+
+    private:
+
+        static double normal_cdf(double value, double mean, double stddev) 
+        {
+            if (stddev == 0)
+            {
+                if (value < mean)
+                    return 0;
+                else if (value > mean)
+                    return 1;
+                else
+                    return 0.5;
+            }
+            value = (value-mean)/stddev;
+            return 0.5 * std::erfc(-value / std::sqrt(2.0));
+        }
+
+        double n;
+        matrix<double,2,2> R;
+        matrix<double,2,1> w;
+        double residual_squared;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    double probability_gradient_less_than (
+        const T& container,
+        double thresh
+    )
+    {
+        running_gradient g;
+        for(auto&& v : container)
+            g.add(v);
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT(g.current_n() > 2,
+            "\t double probability_gradient_less_than()"
+            << "\n\t You need more than 2 elements in the given container to call this function."
+        );
+        return g.probability_gradient_less_than(thresh);
+    }
+
+    template <
+        typename T
+        >
+    double probability_gradient_greater_than (
+        const T& container,
+        double thresh
+    )
+    {
+        running_gradient g;
+        for(auto&& v : container)
+            g.add(v);
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT(g.current_n() > 2,
+            "\t double probability_gradient_greater_than()"
+            << "\n\t You need more than 2 elements in the given container to call this function."
+        );
+        return g.probability_gradient_greater_than(thresh);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        > 
+    double find_upper_quantile (
+        const T& container_,
+        double quantile
+    )
+    {
+        DLIB_CASSERT(0 <= quantile && quantile <= 1.0);
+
+        // copy container into a std::vector
+        std::vector<double> container(container_.begin(), container_.end());
+
+        DLIB_CASSERT(container.size() > 0);
+
+        size_t idx_upper = std::round((container.size()-1)*(1-quantile));
+
+        std::nth_element(container.begin(), container.begin()+idx_upper, container.end());
+        auto upper_q = *(container.begin()+idx_upper);
+        return upper_q;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        > 
+    size_t count_steps_without_decrease (
+        const T& container,
+        double probability_of_decrease = 0.51
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1,
+            "\t size_t count_steps_without_decrease()"
+            << "\n\t probability_of_decrease: "<< probability_of_decrease 
+        );
+
+        running_gradient g;
+        size_t count = 0;
+        size_t j = 0;
+        for (auto i = container.rbegin(); i != container.rend(); ++i)
+        {
+            ++j;
+            g.add(*i);
+            if (g.current_n() > 2)
+            {
+                // Note that this only looks backwards because we are looping over the
+                // container backwards.  So here we are really checking if the gradient isn't
+                // decreasing.
+                double prob_decreasing = g.probability_gradient_greater_than(0);
+                // If we aren't confident things are decreasing.
+                if (prob_decreasing < probability_of_decrease)
+                    count = j;
+            }
+        }
+        return count;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        > 
+    size_t count_steps_without_decrease_robust (
+        const T& container,
+        double probability_of_decrease = 0.51,
+        double quantile_discard = 0.10
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(0 <= quantile_discard && quantile_discard <= 1);
+        DLIB_ASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1,
+            "\t size_t count_steps_without_decrease_robust()"
+            << "\n\t probability_of_decrease: "<< probability_of_decrease 
+        );
+
+        if (container.size() == 0)
+            return 0;
+
+        const auto quantile_thresh = find_upper_quantile(container, quantile_discard); 
+
+        running_gradient g;
+        size_t count = 0;
+        size_t j = 0;
+        for (auto i = container.rbegin(); i != container.rend(); ++i)
+        {
+            ++j;
+            // ignore values that are too large
+            if (*i <= quantile_thresh)
+                g.add(*i);
+
+            if (g.current_n() > 2)
+            {
+                // Note that this only looks backwards because we are looping over the
+                // container backwards.  So here we are really checking if the gradient isn't
+                // decreasing.
+                double prob_decreasing = g.probability_gradient_greater_than(0);
+                // If we aren't confident things are decreasing.
+                if (prob_decreasing < probability_of_decrease)
+                    count = j;
+            }
+        }
+        return count;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        > 
+    size_t count_steps_without_increase (
+        const T& container,
+        double probability_of_increase = 0.51
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(0.5 < probability_of_increase && probability_of_increase < 1,
+            "\t size_t count_steps_without_increase()"
+            << "\n\t probability_of_increase: "<< probability_of_increase 
+        );
+
+        running_gradient g;
+        size_t count = 0;
+        size_t j = 0;
+        for (auto i = container.rbegin(); i != container.rend(); ++i)
+        {
+            ++j;
+            g.add(*i);
+            if (g.current_n() > 2)
+            {
+                // Note that this only looks backwards because we are looping over the
+                // container backwards.  So here we are really checking if the gradient isn't
+                // increasing.
+                double prob_increasing = g.probability_gradient_less_than(0);
+                // If we aren't confident things are increasing.
+                if (prob_increasing < probability_of_increase)
+                    count = j;
+            }
+        }
+        return count;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RuNNING_GRADIENT_Hh_
+
+
diff --git a/ml/dlib/dlib/statistics/running_gradient_abstract.h b/ml/dlib/dlib/statistics/running_gradient_abstract.h
new file mode 100644
index 000000000..a42e1c152
--- /dev/null
+++ b/ml/dlib/dlib/statistics/running_gradient_abstract.h
@@ -0,0 +1,276 @@
+// Copyright (C) 2016  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_
+#ifdef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_
+
+
+namespace dlib
+{
+    class running_gradient 
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for estimating if a noisy sequence of numbers is
+                trending up or down and by how much.  It does this by finding the least
+                squares fit of a line to the data and then allows you to perform a
+                statistical test on the slope of that line.
+        !*/
+
+    public:
+
+        running_gradient (
+        );
+        /*!
+            ensures
+                - #current_n() == 0
+        !*/
+
+        void clear(
+        );
+        /*!
+            ensures
+                - #current_n() == 0
+                - this object has its initial value
+                - clears all memory of any previous data points
+        !*/
+
+        double current_n (
+        ) const;
+        /*!
+            ensures
+                - returns the number of values given to this object by add(). 
+        !*/
+
+        void add(
+            double y
+        );
+        /*!
+            ensures
+                - Updates the gradient() and standard_error() estimates in this object
+                  based on the new y value.
+                - #current_n() == current_n() + 1
+        !*/
+
+        double gradient (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - If we consider the values given to add() as time series data, we can
+                  estimate the rate-of-change of those values.  That is, how much,
+                  typically, do those values change from sample to sample?  The gradient()
+                  function returns the current estimate.  It does this by finding the least
+                  squares fit of a line to the data given to add() and returning the slope
+                  of this line.
+        !*/
+
+        double intercept (
+        ) const;
+        /*!
+            requires
+                - current_n() > 0
+            ensures
+                - This class fits a line to the time series data given to add().  This
+                  function returns the intercept of that line while gradient() returns the
+                  slope of that line.  This means that, for example, the next point that
+                  add() will see, as predicted by this best fit line, is the value
+                  intercept() + current_n()*gradient().
+        !*/
+
+        double standard_error ( 
+        ) const;
+        /*!
+            requires
+                - current_n() > 2
+            ensures
+                - returns the standard deviation of the estimate of gradient(). 
+        !*/
+
+        double probability_gradient_less_than (
+            double thresh
+        ) const;
+        /*!
+            requires
+                - current_n() > 2
+            ensures
+                - If we can assume the values given to add() are linearly related to each
+                  other and corrupted by Gaussian additive noise then our estimate of
+                  gradient() is a random variable with a mean value of gradient() and a
+                  standard deviation of standard_error().  This lets us compute the
+                  probability that the true gradient of the data is less than thresh, which
+                  is what this function returns.
+        !*/
+
+        double probability_gradient_greater_than (
+            double thresh
+        ) const;
+        /*!
+            requires
+                - current_n() > 2
+            ensures
+                - returns 1-probability_gradient_less_than(thresh)
+        !*/
+
+    };
+
+    void serialize (
+        const running_gradient& item, 
+        std::ostream& out 
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+    void deserialize (
+        running_gradient& item, 
+        std::istream& in
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    double probability_gradient_less_than (
+        const T& container,
+        double thresh
+    );
+    /*!
+        requires
+            - container must be a container of double values that can be enumerated with a
+              range based for loop.
+            - The container must contain more than 2 elements.
+        ensures
+            - Puts all the elements of container into a running_gradient object, R, and
+              then returns R.probability_gradient_less_than(thresh). 
+    !*/
+
+    template <
+        typename T
+        >
+    double probability_gradient_greater_than (
+        const T& container,
+        double thresh
+    );
+    /*!
+        requires
+            - container must be a container of double values that can be enumerated with a
+              range based for loop.
+            - The container must contain more than 2 elements.
+        ensures
+            - Puts all the elements of container into a running_gradient object, R, and
+              then returns R.probability_gradient_greater_than(thresh).
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        > 
+    size_t count_steps_without_decrease (
+        const T& container,
+        double probability_of_decrease = 0.51
+    );
+    /*!
+        requires
+            - container must be a container of double values that can be enumerated with
+              .rbegin() and .rend().
+            - 0.5 < probability_of_decrease < 1
+        ensures
+            - If you think of the contents of container as a potentially noisy time series,
+              then this function returns a count of how long the time series has gone
+              without noticeably decreasing in value.  It does this by adding the
+              elements into a running_gradient object and counting how many elements,
+              starting with container.back(), that you need to examine before you are
+              confident that the series has been decreasing in value.  Here, "confident of
+              decrease" means that the probability of decrease is >= probability_of_decrease.  
+            - Setting probability_of_decrease to 0.51 means we count until we see even a
+              small hint of decrease, whereas a larger value of 0.99 would return a larger
+              count since it keeps going until it is nearly certain the time series is
+              decreasing.
+            - The max possible output from this function is container.size().
+    !*/
+
+    template <
+        typename T
+        > 
+    size_t count_steps_without_decrease_robust (
+        const T& container,
+        double probability_of_decrease = 0.51,
+        double quantile_discard = 0.10
+    );
+    /*!
+        requires
+            - container must be a container of double values that can be enumerated with
+              .begin() and .end() as well as .rbegin() and .rend().
+            - 0.5 < probability_of_decrease < 1
+            - 0 <= quantile_discard <= 1
+        ensures
+            - This function behaves just like
+              count_steps_without_decrease(container,probability_of_decrease) except that
+              it ignores values in container that are in the upper quantile_discard
+              quantile.  So for example, if the quantile discard is 0.1 then the 10%
+              largest values in container are ignored.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        > 
+    size_t count_steps_without_increase (
+        const T& container,
+        double probability_of_increase = 0.51
+    );
+    /*!
+        requires
+            - container must be a container of double values that can be enumerated with
+              .rbegin() and .rend().
+            - 0.5 < probability_of_increase < 1
+        ensures
+            - If you think of the contents of container as a potentially noisy time series,
+              then this function returns a count of how long the time series has gone
+              without noticeably increasing in value.  It does this by adding the
+              elements into a running_gradient object and counting how many elements,
+              starting with container.back(), that you need to examine before you are
+              confident that the series has been increasing in value.  Here, "confident of
+              increase" means that the probability of increase is >= probability_of_increase.  
+            - Setting probability_of_increase to 0.51 means we count until we see even a
+              small hint of increase, whereas a larger value of 0.99 would return a larger
+              count since it keeps going until it is nearly certain the time series is
+              increasing.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        > 
+    double find_upper_quantile (
+        const T& container,
+        double quantile
+    );
+    /*!
+        requires
+            - container must be a container of double values that can be enumerated with
+              .begin() and .end().
+            - 0 <= quantile <= 1
+            - container.size() > 0
+        ensures
+            - Finds and returns the value such that quantile percent of the values in
+              container are greater than it.  For example, 0.5 would find the median value
+              in container while 0.1 would find the value that lower bounded the 10%
+              largest values in container.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/statistics/sammon.h b/ml/dlib/dlib/statistics/sammon.h
new file mode 100644
index 000000000..1a3eb72a1
--- /dev/null
+++ b/ml/dlib/dlib/statistics/sammon.h
@@ -0,0 +1,269 @@
+// Copyright (C) 2012  Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SAMMoN_Hh_
+#define DLIB_SAMMoN_Hh_
+
+#include "sammon_abstract.h"
+#include "../matrix.h"
+#include "../algs.h"
+#include "dpca.h"
+#include <vector>
+
+namespace dlib
+{
+
+    class sammon_projection
+    {
+
+    public:
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename matrix_type>
+        std::vector<matrix<double,0,1> > operator() (
+            const std::vector<matrix_type>& data,       
+            const long num_dims                      
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(num_dims > 0,
+                "\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
+                << "\n\t Invalid inputs were given to this function."
+                << "\n\t num_dims:    " << num_dims
+                );
+            std::vector<matrix<double,0,1> > result;    // projections
+            if (data.size() == 0)
+            {
+                return result;
+            }
+
+#ifdef ENABLE_ASSERTS
+            DLIB_ASSERT(0 < num_dims && num_dims <= data[0].size(),
+                "\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
+                << "\n\t Invalid inputs were given to this function."
+                << "\n\t data.size():    " << data.size()
+                << "\n\t num_dims:       " << num_dims
+                << "\n\t data[0].size(): " << data[0].size() 
+                );
+            for (unsigned long i = 0; i < data.size(); ++i)
+            {
+                DLIB_ASSERT(is_col_vector(data[i]) && data[i].size() == data[0].size(),
+                        "\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
+                        << "\n\t Invalid inputs were given to this function."
+                        << "\n\t data["<<i<<"].size():    " << data[i].size()
+                        << "\n\t data[0].size(): " << data[0].size() 
+                        << "\n\t is_col_vector(data["<<i<<"]): " << is_col_vector(data[i])
+                );
+            }
+#endif
+
+            double err;                                 // error (discarded)
+            do_sammon_projection(data, num_dims, result, err);
+            return result;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename matrix_type>
+        void operator() (
+            const std::vector<matrix_type>& data,       
+            const long num_dims,                     
+            std::vector<matrix<double,0,1> >& result,   
+            double &err,                                
+            const unsigned long num_iters = 1000,             
+            const double err_delta = 1.0e-9            
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(num_dims > 0 && num_iters > 0 && err_delta > 0.0,
+                "\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
+                << "\n\t Invalid inputs were given to this function."
+                << "\n\t data.size(): " << data.size()
+                << "\n\t num_dims:    " << num_dims
+                << "\n\t num_iters:   " << num_iters
+                << "\n\t err_delta:   " << err_delta
+                );
+            if (data.size() == 0)
+            {
+                result.clear();
+                err = 0;
+                return;
+            }
+
+#ifdef ENABLE_ASSERTS
+            DLIB_ASSERT(0 < num_dims && num_dims <= data[0].size(),
+                "\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
+                << "\n\t Invalid inputs were given to this function."
+                << "\n\t data.size():    " << data.size()
+                << "\n\t num_dims:       " << num_dims
+                << "\n\t data[0].size(): " << data[0].size() 
+                );
+            for (unsigned long i = 0; i < data.size(); ++i)
+            {
+                DLIB_ASSERT(is_col_vector(data[i]) && data[i].size() == data[0].size(),
+                        "\t std::vector<matrix<double,0,1> > sammon_projection::operator()"
+                        << "\n\t Invalid inputs were given to this function."
+                        << "\n\t data["<<i<<"].size():    " << data[i].size()
+                        << "\n\t data[0].size(): " << data[0].size() 
+                        << "\n\t is_col_vector(data["<<i<<"]): " << is_col_vector(data[i])
+                );
+            }
+#endif
+
+            do_sammon_projection(data, num_dims, result, err, num_iters, err_delta);
+        }
+
+        // ----------------------------------------------------------------------------------------
+        // ----------------------------------------------------------------------------------------
+
+    private:
+
+        void compute_relative_distances(
+            matrix<double,0,1>& dist,                   // relative distances (output)
+            matrix<double,0,0>& data,                   // input data (matrix whose columns are the input vectors)
+            double eps_ratio = 1.0e-7                   // to compute the minimum distance eps
+        )
+        /*!
+            requires
+                - dist.nc() == comb( data.nc(), 2 ), preallocated
+                - eps_ratio > 0
+            ensures
+                - dist[k] == lenght(data[i] - data[j]) for k = j(j-1)/2 + i
+        !*/
+        {
+            const long N = data.nc();                   // num of points
+            double eps;                                 // minimum distance, forced to avoid vectors collision
+                                                        // computed at runtime as eps_ration * mean(vectors distances)
+            for (int k = 0, i = 1; i < N; ++i)
+                for (int j = 0; j < i; ++j)
+                    dist(k++) = length(colm(data, i) - colm(data, j));
+
+            eps = eps_ratio * mean(dist);
+            dist = lowerbound(dist, eps);
+        }
+
+        // ----------------------------------------------------------------------------------------
+
+        template <typename matrix_type>
+        void do_sammon_projection(
+            const std::vector<matrix_type>& data,       // input data
+            unsigned long num_dims,                     // dimension of the reduced space
+            std::vector<matrix<double,0,1> >& result,   // projections (output)
+            double &err,                                // error (output)
+            unsigned long num_iters = 1000,             // max num of iterations: stop condition
+            const double err_delta = 1.0e-9             // delta error: stop condition
+        )
+        /*!
+            requires
+                - matrix_type should be a kind of dlib::matrix<double,N,1>
+                - num_dims > 0
+                - num_iters > 0
+                - err_delta > 0
+            ensures
+                - result == a set of matrix<double,num_dims,1> objects that represent
+                  the Sammon's projections of data vectors.
+                - err == the estimated error done in the projection, with the extra
+                  property that err(at previous iteration) - err < err_delta
+        !*/
+        {
+            // other params
+            const double mf = 0.3;                      // magic factor
+
+            matrix<double> mdata;                // input data as matrix
+            matrix<double> projs;                // projected vectors, i.e. output data as matrix
+
+            // std::vector<matrix> -> matrix
+            mdata.set_size(data[0].size(), data.size());
+            for (unsigned int i = 0; i < data.size(); i++)
+                set_colm(mdata, i) = data[i];
+
+            const long N = mdata.nc();           // num of points
+            const long d = num_dims;             // size of the reduced space
+            const long nd = N * (N - 1) / 2;     // num of pairs of points = size of the distances vectors
+
+            matrix<double, 0, 1> dsij, inv_dsij; // d*_ij: pair-wise distances in the input space (and inverses)
+            dsij.set_size(nd, 1);
+            inv_dsij.set_size(nd, 1);
+            double ic; // 1.0 / sum of dsij
+
+            matrix<double, 0, 1> dij;            // d_ij: pair-wise distances in the reduced space
+            dij.set_size(nd, 1);
+
+            matrix<double, 0, 0> dE, dE2, dtemp; // matrices representing error partial derivatives
+            dE.set_size(d, N);
+            dE2.set_size(d, N);
+            dtemp.set_size(d, N);
+
+            matrix<double, 0, 1> inv_dij, alpha; // utility vectors used to compute the partial derivatives
+            inv_dij.set_size(N, 1);              // inv_dij is 1.0/dij, but we only need it column-wise
+            alpha.set_size(N, 1);                // (slightly wasting a bit of computation)
+            // alpha = 1.0/dij - 1.0/dsij, again column-wise
+
+
+            // initialize projs with PCA
+            discriminant_pca<matrix<double> > dpca;
+            for (int i = 0; i < mdata.nc(); ++i)
+            {
+                dpca.add_to_total_variance(colm(mdata, i));
+            }
+            matrix<double> mat = dpca.dpca_matrix_of_size(num_dims);
+            projs = mat * mdata;
+
+            // compute dsij, inv_dsij and ic
+            compute_relative_distances(dsij, mdata);
+            inv_dsij = 1.0 / dsij;
+            ic = 1.0 / sum(dsij);
+
+            // compute dij and err
+            compute_relative_distances(dij, projs);
+            err = ic * sum(pointwise_multiply(squared(dij - dsij), inv_dsij));
+
+            // start iterating
+            while (num_iters--)
+            {
+                // compute dE, dE2 progressively column by column
+                for (int p = 0; p < N; ++p)
+                {
+                    // compute
+                    // - alpha_p, the column vector with 1/d_pj - 1/d*_pj
+                    // - dtemp, the matrix with the p-th column repeated all along
+                    //TODO: optimize constructions
+                    for (int i = 0; i < N; ++i)
+                    {
+                        int pos = (i < p) ? p * (p - 1) / 2 + i : i * (i - 1) / 2 + p;
+                        inv_dij(i) = (i == p) ? 0.0 : 1.0 / dij(pos);
+                        alpha(i) = (i == p) ? 0.0 : inv_dij(i) - inv_dsij(pos);
+                        set_colm(dtemp, i) = colm(projs, p);
+                    }
+
+                    dtemp -= projs;
+                    set_colm(dE, p) = dtemp * alpha;
+
+                    double sum_alpha = sum(alpha);
+                    set_colm(dE2, p) = abs( sum_alpha + squared(dtemp) * cubed(inv_dij) );
+                }
+
+
+                // compute the update projections
+                projs += pointwise_multiply(dE, mf * reciprocal(dE2));
+
+                // compute new dij and error
+                compute_relative_distances(dij, projs);
+                double err_new = ic * sum( pointwise_multiply(squared(dij - dsij), inv_dsij) );
+                if (err - err_new < err_delta)
+                    break;
+                err = err_new;
+            }
+
+            // matrix -> std::vector<matrix>
+            result.clear();
+            for (int i = 0; i < projs.nc(); ++i)
+                result.push_back(colm(projs, i));
+        }
+
+    };
+
+} // namespace dlib
+
+#endif // DLIB_SAMMoN_Hh_
+
diff --git a/ml/dlib/dlib/statistics/sammon_abstract.h b/ml/dlib/dlib/statistics/sammon_abstract.h
new file mode 100644
index 000000000..0e009729c
--- /dev/null
+++ b/ml/dlib/dlib/statistics/sammon_abstract.h
@@ -0,0 +1,117 @@
+// Copyright (C) 2012  Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SAMMoN_ABSTRACT_Hh_
+#ifdef DLIB_SAMMoN_ABSTRACT_Hh_
+
+#include "../matrix/matrix_abstract.h"
+#include <vector>
+
+namespace dlib
+{
+
+    class sammon_projection
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is a function object that computes the Sammon projection of a set
+                of N points in a L-dimensional vector space onto a d-dimensional space
+                (d < L), according to the paper:
+                    A Nonlinear Mapping for Data Structure Analysis (1969) by J.W. Sammon
+
+                The current implementation is a vectorized version of the original algorithm.
+        !*/
+
+    public:
+
+        sammon_projection(
+        );
+        /*!
+            ensures
+                - this object is properly initialized 
+        !*/
+
+        template <typename matrix_type>
+        std::vector<matrix<double,0,1> > operator() (
+            const std::vector<matrix_type>& data,       
+            const long num_dims                      
+        );
+        /*!
+            requires
+                - num_dims > 0
+                - matrix_type should be a kind of dlib::matrix of doubles capable
+                  of representing column vectors.
+                - for all valid i:
+                    - is_col_vector(data[i]) == true
+                    - data[0].size() == data[i].size()
+                      (i.e. all the vectors in data must have the same dimensionality)
+                - if (data.size() != 0) then
+                    - 0 < num_dims <= data[0].size()
+                      (i.e. you can't project into a higher dimension than the input data,
+                      only to a lower dimension.)
+            ensures
+                - This routine computes Sammon's dimensionality reduction method based on the
+                  given input data.  It will attempt to project the contents of data into a
+                  num_dims dimensional space that preserves relative distances between the
+                  input data points.
+                - This function returns a std::vector, OUT, such that:
+                    - OUT == a set of column vectors that represent the Sammon projection of 
+                      the input data vectors. 
+                    - OUT.size() == data.size()
+                    - for all valid i:
+                        - OUT[i].size() == num_dims
+                        - OUT[i] == the Sammon projection of the input vector data[i]
+        !*/
+
+        template <typename matrix_type>
+        void operator() (
+            const std::vector<matrix_type>& data,       
+            const long num_dims,                     
+            std::vector<matrix<double,0,1> >& result,   
+            double &err,                                
+            const unsigned long num_iters = 1000,             
+            const double err_delta = 1.0e-9            
+        );
+        /*!
+            requires
+                - num_iters > 0
+                - err_delta > 0
+                - num_dims > 0
+                - matrix_type should be a kind of dlib::matrix of doubles capable
+                  of representing column vectors.
+                - for all valid i:
+                    - is_col_vector(data[i]) == true
+                    - data[0].size() == data[i].size()
+                      (i.e. all the vectors in data must have the same dimensionality)
+                - if (data.size() != 0) then
+                    - 0 < num_dims <= data[0].size()
+                      (i.e. you can't project into a higher dimension than the input data,
+                      only to a lower dimension.)
+            ensures
+                - This routine computes Sammon's dimensionality reduction method based on the
+                  given input data.  It will attempt to project the contents of data into a
+                  num_dims dimensional space that preserves relative distances between the
+                  input data points.
+                - #err == the final error value at the end of the algorithm.  The goal of Sammon's
+                  algorithm is to find a lower dimensional projection of the input data that
+                  preserves the relative distances between points.  The value in #err is a measure
+                  of the total error at the end of the algorithm.  So smaller values indicate
+                  a better projection was found than if a large value is returned via #err.
+                - Sammon's algorithm will run until either num_iters iterations has executed
+                  or the change in error from one iteration to the next is less than err_delta.
+                - Upon completion, the output of Sammon's projection is stored into #result, in
+                  particular, we will have:
+                    - #result == a set of column vectors that represent the Sammon projection of 
+                      the input data vectors. 
+                    - #result.size() == data.size()
+                    - for all valid i:
+                        - #result[i].size() == num_dims
+                        - #result[i] == the Sammon projection of the input vector data[i]
+        !*/
+
+    };
+
+} 
+
+#endif // DLIB_SAMMoN_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/statistics/statistics.h b/ml/dlib/dlib/statistics/statistics.h
new file mode 100644
index 000000000..9dee7006b
--- /dev/null
+++ b/ml/dlib/dlib/statistics/statistics.h
@@ -0,0 +1,1890 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net), Steve Taylor
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STATISTICs_
+#define DLIB_STATISTICs_
+
+#include "statistics_abstract.h"
+#include <limits>
+#include <cmath>
+#include "../algs.h"
+#include "../matrix.h"
+#include "../sparse_vector.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    class running_stats
+    {
+    public:
+
+        running_stats()
+        {
+            clear();
+
+            COMPILE_TIME_ASSERT ((
+                    is_same_type<float,T>::value ||
+                    is_same_type<double,T>::value ||
+                    is_same_type<long double,T>::value 
+            ));
+        }
+
+        void clear()
+        {
+            sum = 0;
+            sum_sqr  = 0;
+            sum_cub  = 0;
+            sum_four = 0;
+
+            n = 0;
+            min_value = std::numeric_limits<T>::infinity();
+            max_value = -std::numeric_limits<T>::infinity();
+        }
+
+        void add (
+            const T& val
+        )
+        {
+            sum      += val;
+            sum_sqr  += val*val;
+            sum_cub  += cubed(val);
+            sum_four += quaded(val);
+
+            if (val < min_value)
+                min_value = val;
+            if (val > max_value)
+                max_value = val;
+
+            ++n;
+        }
+
+        T current_n (
+        ) const
+        {
+            return n;
+        }
+
+        T mean (
+        ) const
+        {
+            if (n != 0)
+                return sum/n;
+            else
+                return 0;
+        }
+
+        T max (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 0,
+                "\tT running_stats::max"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            return max_value;
+        }
+
+        T min (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 0,
+                "\tT running_stats::min"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            return min_value;
+        }
+
+        T variance (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_stats::variance"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            T temp = 1/(n-1);
+            temp = temp*(sum_sqr - sum*sum/n);
+            // make sure the variance is never negative.  This might
+            // happen due to numerical errors.
+            if (temp >= 0)
+                return temp;
+            else
+                return 0;
+        }
+
+        T stddev (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_stats::stddev"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            return std::sqrt(variance());
+        }
+
+        T skewness (
+        ) const
+        {  
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 2,
+                "\tT running_stats::skewness"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+            );
+
+            T temp  = 1/n;
+            T temp1 = std::sqrt(n*(n-1))/(n-2); 
+            temp    = temp1*temp*(sum_cub - 3*sum_sqr*sum*temp + 2*cubed(sum)*temp*temp)/
+                      (std::sqrt(std::pow(temp*(sum_sqr-sum*sum*temp),3)));
+
+            return temp; 
+        }
+
+        T ex_kurtosis (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 3,
+                "\tT running_stats::kurtosis"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+            );
+
+            T temp = 1/n;
+            T m4   = temp*(sum_four - 4*sum_cub*sum*temp+6*sum_sqr*sum*sum*temp*temp
+                     -3*quaded(sum)*cubed(temp));
+            T m2   = temp*(sum_sqr-sum*sum*temp);
+            temp   = (n-1)*((n+1)*m4/(m2*m2)-3*(n-1))/((n-2)*(n-3));
+
+            return temp; 
+        }
+
+        T scale (
+            const T& val
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_stats::variance"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+            return (val-mean())/std::sqrt(variance());
+        }
+
+        running_stats operator+ (
+            const running_stats& rhs
+        ) const
+        {
+            running_stats temp(*this);
+
+            temp.sum += rhs.sum;
+            temp.sum_sqr += rhs.sum_sqr;
+            temp.sum_cub += rhs.sum_cub;
+            temp.sum_four += rhs.sum_four;
+            temp.n += rhs.n;
+            temp.min_value = std::min(rhs.min_value, min_value);
+            temp.max_value = std::max(rhs.max_value, max_value);
+            return temp;
+        }
+
+        template <typename U>
+        friend void serialize (
+            const running_stats<U>& item, 
+            std::ostream& out 
+        );
+
+        template <typename U>
+        friend void deserialize (
+            running_stats<U>& item, 
+            std::istream& in
+        ); 
+
+    private:
+        T sum;
+        T sum_sqr;
+        T sum_cub;
+        T sum_four;
+        T n;
+        T min_value;
+        T max_value;
+    
+        T cubed  (const T& val) const {return val*val*val; }
+        T quaded (const T& val) const {return val*val*val*val; }
+    };
+
+    template <typename T>
+    void serialize (
+        const running_stats<T>& item, 
+        std::ostream& out 
+    )
+    {
+        int version = 2;
+        serialize(version, out);
+
+        serialize(item.sum, out);
+        serialize(item.sum_sqr, out);
+        serialize(item.sum_cub, out);
+        serialize(item.sum_four, out);
+        serialize(item.n, out);
+        serialize(item.min_value, out);
+        serialize(item.max_value, out);
+    }
+
+    template <typename T>
+    void deserialize (
+        running_stats<T>& item, 
+        std::istream& in
+    ) 
+    {
+        int version = 0;
+        deserialize(version, in);
+        if (version != 2)
+            throw dlib::serialization_error("Unexpected version number found while deserializing dlib::running_stats object.");
+
+        deserialize(item.sum, in);
+        deserialize(item.sum_sqr, in);
+        deserialize(item.sum_cub, in);
+        deserialize(item.sum_four, in);
+        deserialize(item.n, in);
+        deserialize(item.min_value, in);
+        deserialize(item.max_value, in);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    class running_scalar_covariance
+    {
+    public:
+
+        running_scalar_covariance()
+        {
+            clear();
+
+            COMPILE_TIME_ASSERT ((
+                    is_same_type<float,T>::value ||
+                    is_same_type<double,T>::value ||
+                    is_same_type<long double,T>::value 
+            ));
+        }
+
+        void clear()
+        {
+            sum_xy = 0;
+            sum_x = 0;
+            sum_y = 0;
+            sum_xx = 0;
+            sum_yy = 0;
+            n = 0;
+        }
+
+        void add (
+            const T& x,
+            const T& y
+        )
+        {
+            sum_xy += x*y;
+
+            sum_xx += x*x;
+            sum_yy += y*y;
+
+            sum_x  += x;
+            sum_y  += y;
+
+            n += 1;
+        }
+
+        T current_n (
+        ) const
+        {
+            return n;
+        }
+
+        T mean_x (
+        ) const
+        {
+            if (n != 0)
+                return sum_x/n;
+            else
+                return 0;
+        }
+
+        T mean_y (
+        ) const
+        {
+            if (n != 0)
+                return sum_y/n;
+            else
+                return 0;
+        }
+
+        T covariance (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_scalar_covariance::covariance()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            return 1/(n-1) * (sum_xy - sum_y*sum_x/n);
+        }
+
+        T correlation (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_scalar_covariance::correlation()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            return covariance() / std::sqrt(variance_x()*variance_y());
+        }
+
+        T variance_x (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_scalar_covariance::variance_x()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            T temp = 1/(n-1) * (sum_xx - sum_x*sum_x/n);
+            // make sure the variance is never negative.  This might
+            // happen due to numerical errors.
+            if (temp >= 0)
+                return temp;
+            else
+                return 0;
+        }
+
+        T variance_y (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_scalar_covariance::variance_y()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            T temp = 1/(n-1) * (sum_yy - sum_y*sum_y/n);
+            // make sure the variance is never negative.  This might
+            // happen due to numerical errors.
+            if (temp >= 0)
+                return temp;
+            else
+                return 0;
+        }
+
+        T stddev_x (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_scalar_covariance::stddev_x()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            return std::sqrt(variance_x());
+        }
+
+        T stddev_y (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_scalar_covariance::stddev_y()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            return std::sqrt(variance_y());
+        }
+
+        running_scalar_covariance operator+ (
+            const running_scalar_covariance& rhs
+        ) const
+        {
+            running_scalar_covariance temp(rhs);
+
+            temp.sum_xy += sum_xy;
+            temp.sum_x  += sum_x;
+            temp.sum_y  += sum_y;
+            temp.sum_xx += sum_xx;
+            temp.sum_yy += sum_yy;
+            temp.n      += n;
+            return temp;
+        }
+
+    private:
+
+        T sum_xy;
+        T sum_x;
+        T sum_y;
+        T sum_xx;
+        T sum_yy;
+        T n;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    class running_scalar_covariance_decayed
+    {
+    public:
+
+        explicit running_scalar_covariance_decayed(
+            T decay_halflife = 1000 
+        )
+        {
+            DLIB_ASSERT(decay_halflife > 0);
+
+            sum_xy = 0;
+            sum_x = 0;
+            sum_y = 0;
+            sum_xx = 0;
+            sum_yy = 0;
+            forget = std::pow(0.5, 1/decay_halflife);
+            n = 0;
+
+            COMPILE_TIME_ASSERT ((
+                    is_same_type<float,T>::value ||
+                    is_same_type<double,T>::value ||
+                    is_same_type<long double,T>::value 
+            ));
+        }
+
+        T forget_factor (
+        ) const 
+        { 
+            return forget; 
+        }
+
+        void add (
+            const T& x,
+            const T& y
+        )
+        {
+            sum_xy = sum_xy*forget + x*y;
+
+            sum_xx = sum_xx*forget + x*x;
+            sum_yy = sum_yy*forget + y*y;
+
+            sum_x  = sum_x*forget + x;
+            sum_y  = sum_y*forget + y;
+
+            n = n*forget + 1;
+        }
+
+        T current_n (
+        ) const
+        {
+            return n;
+        }
+
+        T mean_x (
+        ) const
+        {
+            if (n != 0)
+                return sum_x/n;
+            else
+                return 0;
+        }
+
+        T mean_y (
+        ) const
+        {
+            if (n != 0)
+                return sum_y/n;
+            else
+                return 0;
+        }
+
+        T covariance (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_scalar_covariance_decayed::covariance()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            return 1/(n-1) * (sum_xy - sum_y*sum_x/n);
+        }
+
+        T correlation (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_scalar_covariance_decayed::correlation()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            T temp = std::sqrt(variance_x()*variance_y());
+            if (temp != 0)
+                return covariance() / temp;
+            else
+                return 0; // just say it's zero if there isn't any variance in x or y.
+        }
+
+        T variance_x (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_scalar_covariance_decayed::variance_x()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            T temp = 1/(n-1) * (sum_xx - sum_x*sum_x/n);
+            // make sure the variance is never negative.  This might
+            // happen due to numerical errors.
+            if (temp >= 0)
+                return temp;
+            else
+                return 0;
+        }
+
+        T variance_y (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_scalar_covariance_decayed::variance_y()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            T temp = 1/(n-1) * (sum_yy - sum_y*sum_y/n);
+            // make sure the variance is never negative.  This might
+            // happen due to numerical errors.
+            if (temp >= 0)
+                return temp;
+            else
+                return 0;
+        }
+
+        T stddev_x (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_scalar_covariance_decayed::stddev_x()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            return std::sqrt(variance_x());
+        }
+
+        T stddev_y (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_scalar_covariance_decayed::stddev_y()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            return std::sqrt(variance_y());
+        }
+
+    private:
+
+        T sum_xy;
+        T sum_x;
+        T sum_y;
+        T sum_xx;
+        T sum_yy;
+        T n;
+        T forget;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    class running_stats_decayed
+    {
+    public:
+
+        explicit running_stats_decayed(
+            T decay_halflife = 1000 
+        )
+        {
+            DLIB_ASSERT(decay_halflife > 0);
+
+            sum_x = 0;
+            sum_xx = 0;
+            forget = std::pow(0.5, 1/decay_halflife);
+            n = 0;
+
+            COMPILE_TIME_ASSERT ((
+                    is_same_type<float,T>::value ||
+                    is_same_type<double,T>::value ||
+                    is_same_type<long double,T>::value 
+            ));
+        }
+
+        T forget_factor (
+        ) const 
+        { 
+            return forget; 
+        }
+
+        void add (
+            const T& x
+        )
+        {
+
+            sum_xx = sum_xx*forget + x*x;
+
+            sum_x  = sum_x*forget + x;
+
+            n = n*forget + 1;
+        }
+
+        T current_n (
+        ) const
+        {
+            return n;
+        }
+
+        T mean (
+        ) const
+        {
+            if (n != 0)
+                return sum_x/n;
+            else
+                return 0;
+        }
+
+        T variance (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_stats_decayed::variance()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            T temp = 1/(n-1) * (sum_xx - sum_x*sum_x/n);
+            // make sure the variance is never negative.  This might
+            // happen due to numerical errors.
+            if (temp >= 0)
+                return temp;
+            else
+                return 0;
+        }
+
+        T stddev (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(current_n() > 1,
+                "\tT running_stats_decayed::stddev()"
+                << "\n\tyou have to add some numbers to this object first"
+                << "\n\tthis: " << this
+                );
+
+            return std::sqrt(variance());
+        }
+
+        template <typename U>
+        friend void serialize (
+            const running_stats_decayed<U>& item, 
+            std::ostream& out 
+        );
+
+        template <typename U>
+        friend void deserialize (
+            running_stats_decayed<U>& item, 
+            std::istream& in
+        ); 
+
+    private:
+
+        T sum_x;
+        T sum_xx;
+        T n;
+        T forget;
+    };
+
+    template <typename T>
+    void serialize (
+        const running_stats_decayed<T>& item, 
+        std::ostream& out 
+    )
+    {
+        int version = 1;
+        serialize(version, out);
+
+        serialize(item.sum_x, out);
+        serialize(item.sum_xx, out);
+        serialize(item.n, out);
+        serialize(item.forget, out);
+    }
+
+    template <typename T>
+    void deserialize (
+        running_stats_decayed<T>& item, 
+        std::istream& in
+    ) 
+    {
+        int version = 0;
+        deserialize(version, in);
+        if (version != 1)
+            throw dlib::serialization_error("Unexpected version number found while deserializing dlib::running_stats_decayed object.");
+
+        deserialize(item.sum_x, in);
+        deserialize(item.sum_xx, in);
+        deserialize(item.n, in);
+        deserialize(item.forget, in);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename alloc
+        >
+    double mean_sign_agreement (
+        const std::vector<T,alloc>& a,
+        const std::vector<T,alloc>& b
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(a.size() == b.size(),
+                    "\t double mean_sign_agreement(a,b)"
+                    << "\n\t a and b must be the same length."
+                    << "\n\t a.size(): " << a.size()
+                    << "\n\t b.size(): " << b.size()
+        );
+
+        
+        double temp = 0;
+        for (unsigned long i = 0; i < a.size(); ++i)
+        {
+            if ((a[i] >= 0 && b[i] >= 0) ||
+                (a[i] < 0  && b[i] <  0))
+            {
+                temp += 1;
+            }
+        }
+
+        return temp/a.size();
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename alloc
+        >
+    double correlation (
+        const std::vector<T,alloc>& a,
+        const std::vector<T,alloc>& b
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(a.size() == b.size() && a.size() > 1,
+                    "\t double correlation(a,b)"
+                    << "\n\t a and b must be the same length and have more than one element."
+                    << "\n\t a.size(): " << a.size()
+                    << "\n\t b.size(): " << b.size()
+        );
+
+        running_scalar_covariance<double> rs;
+        for (unsigned long i = 0; i < a.size(); ++i)
+        {
+            rs.add(a[i], b[i]);
+        }
+        return rs.correlation();
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename alloc
+        >
+    double covariance (
+        const std::vector<T,alloc>& a,
+        const std::vector<T,alloc>& b
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(a.size() == b.size() && a.size() > 1,
+                    "\t double covariance(a,b)"
+                    << "\n\t a and b must be the same length and have more than one element."
+                    << "\n\t a.size(): " << a.size()
+                    << "\n\t b.size(): " << b.size()
+        );
+
+        running_scalar_covariance<double> rs;
+        for (unsigned long i = 0; i < a.size(); ++i)
+        {
+            rs.add(a[i], b[i]);
+        }
+        return rs.covariance();
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename alloc
+        >
+    double r_squared (
+        const std::vector<T,alloc>& a,
+        const std::vector<T,alloc>& b
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(a.size() == b.size() && a.size() > 1,
+                    "\t double r_squared(a,b)"
+                    << "\n\t a and b must be the same length and have more than one element."
+                    << "\n\t a.size(): " << a.size()
+                    << "\n\t b.size(): " << b.size()
+        );
+
+        return std::pow(correlation(a,b),2.0);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename alloc
+        >
+    double mean_squared_error (
+        const std::vector<T,alloc>& a,
+        const std::vector<T,alloc>& b
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(a.size() == b.size(),
+                    "\t double mean_squared_error(a,b)"
+                    << "\n\t a and b must be the same length."
+                    << "\n\t a.size(): " << a.size()
+                    << "\n\t b.size(): " << b.size()
+        );
+
+        return mean(squared(matrix_cast<double>(mat(a))-matrix_cast<double>(mat(b))));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class running_covariance
+    {
+        /*!
+            INITIAL VALUE
+                - vect_size == 0
+                - total_count == 0
+
+            CONVENTION
+                - vect_size == in_vector_size()
+                - total_count == current_n() 
+
+                - if (total_count != 0)
+                    - total_sum == the sum of all vectors given to add()
+                    - the covariance of all the elements given to add() is given
+                      by:
+                        - let avg == total_sum/total_count
+                        - covariance == total_cov/total_count - avg*trans(avg)
+        !*/
+    public:
+
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef typename matrix_type::layout_type layout_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
+        typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
+
+        running_covariance(
+        )
+        {
+            clear();
+        }
+
+        void clear(
+        )
+        {
+            total_count = 0;
+
+            vect_size = 0;
+
+            total_sum.set_size(0);
+            total_cov.set_size(0,0);
+        }
+
+        long in_vector_size (
+        ) const
+        {
+            return vect_size;
+        }
+
+        long current_n (
+        ) const
+        {
+            return static_cast<long>(total_count);
+        }
+
+        void set_dimension (
+            long size
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( size > 0,
+                "\t void running_covariance::set_dimension()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t size: " << size 
+                << "\n\t this: " << this
+                );
+
+            clear();
+            vect_size = size;
+            total_sum.set_size(size);
+            total_cov.set_size(size,size);
+            total_sum = 0;
+            total_cov = 0;
+        }
+
+        template <typename T>
+        typename disable_if<is_matrix<T> >::type add (
+            const T& val
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(((long)max_index_plus_one(val) <= in_vector_size() && in_vector_size() > 0),
+                "\t void running_covariance::add()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t max_index_plus_one(val): " << max_index_plus_one(val) 
+                << "\n\t in_vector_size():        " << in_vector_size() 
+                << "\n\t this:                    " << this
+                );
+
+            for (typename T::const_iterator i = val.begin(); i != val.end(); ++i)
+            {
+                total_sum(i->first) += i->second;
+                for (typename T::const_iterator j = val.begin(); j != val.end(); ++j)
+                {
+                    total_cov(i->first, j->first) += i->second*j->second;
+                }
+            }
+
+            ++total_count;
+        }
+
+        template <typename T>
+        typename enable_if<is_matrix<T> >::type add (
+            const T& val
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(val) && (in_vector_size() == 0 || val.size() == in_vector_size()),
+                "\t void running_covariance::add()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t is_col_vector(val): " << is_col_vector(val) 
+                << "\n\t in_vector_size():   " << in_vector_size() 
+                << "\n\t val.size():         " << val.size() 
+                << "\n\t this:               " << this
+                );
+
+            vect_size = val.size();
+            if (total_count == 0)
+            {
+                total_cov = val*trans(val);
+                total_sum = val;
+            }
+            else
+            {
+                total_cov += val*trans(val);
+                total_sum += val;
+            }
+            ++total_count;
+        }
+
+        const column_matrix mean (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( in_vector_size() != 0,
+                "\t running_covariance::mean()"
+                << "\n\t This object can not execute this function in its current state."
+                << "\n\t in_vector_size(): " << in_vector_size() 
+                << "\n\t current_n():      " << current_n() 
+                << "\n\t this:             " << this
+                );
+
+            return total_sum/total_count;
+        }
+
+        const general_matrix covariance (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( in_vector_size() != 0 && current_n() > 1,
+                "\t running_covariance::covariance()"
+                << "\n\t This object can not execute this function in its current state."
+                << "\n\t in_vector_size(): " << in_vector_size() 
+                << "\n\t current_n():      " << current_n() 
+                << "\n\t this:             " << this
+                );
+
+            return (total_cov - total_sum*trans(total_sum)/total_count)/(total_count-1);
+        }
+
+        const running_covariance operator+ (
+            const running_covariance& item
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT((in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()),
+                "\t running_covariance running_covariance::operator+()"
+                << "\n\t The two running_covariance objects being added must have compatible parameters"
+                << "\n\t in_vector_size():            " << in_vector_size() 
+                << "\n\t item.in_vector_size():       " << item.in_vector_size() 
+                << "\n\t this:                        " << this
+                );
+
+            running_covariance temp(item);
+
+            // make sure we ignore empty matrices
+            if (total_count != 0 && temp.total_count != 0)
+            {
+                temp.total_cov += total_cov;
+                temp.total_sum += total_sum;
+                temp.total_count += total_count;
+            }
+            else if (total_count != 0)
+            {
+                temp.total_cov = total_cov;
+                temp.total_sum = total_sum;
+                temp.total_count = total_count;
+            }
+
+            return temp;
+        }
+
+
+    private:
+
+        general_matrix total_cov;
+        column_matrix total_sum;
+        scalar_type total_count;
+
+        long vect_size;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class running_cross_covariance
+    {
+        /*!
+            INITIAL VALUE
+                - x_vect_size == 0
+                - y_vect_size == 0
+                - total_count == 0
+
+            CONVENTION
+                - x_vect_size == x_vector_size()
+                - y_vect_size == y_vector_size()
+                - total_count == current_n() 
+
+                - if (total_count != 0)
+                    - sum_x == the sum of all x vectors given to add()
+                    - sum_y == the sum of all y vectors given to add()
+                    - total_cov == sum of all x*trans(y) given to add()
+        !*/
+
+    public:
+
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef typename matrix_type::layout_type layout_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
+        typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
+
+        running_cross_covariance(
+        )
+        {
+            clear();
+        }
+
+        void clear(
+        )
+        {
+            total_count = 0;
+
+            x_vect_size = 0;
+            y_vect_size = 0;
+
+            sum_x.set_size(0);
+            sum_y.set_size(0);
+            total_cov.set_size(0,0);
+        }
+
+        long x_vector_size (
+        ) const
+        {
+            return x_vect_size;
+        }
+
+        long y_vector_size (
+        ) const
+        {
+            return y_vect_size;
+        }
+
+        void set_dimensions (
+            long x_size,
+            long y_size
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( x_size > 0 && y_size > 0,
+                "\t void running_cross_covariance::set_dimensions()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t x_size: " << x_size 
+                << "\n\t y_size: " << y_size 
+                << "\n\t this:   " << this
+                );
+
+            clear();
+            x_vect_size = x_size;
+            y_vect_size = y_size;
+            sum_x.set_size(x_size);
+            sum_y.set_size(y_size);
+            total_cov.set_size(x_size,y_size);
+
+            sum_x = 0;
+            sum_y = 0;
+            total_cov = 0;
+        }
+
+        long current_n (
+        ) const
+        {
+            return static_cast<long>(total_count);
+        }
+
+        template <typename T, typename U>
+        typename enable_if_c<!is_matrix<T>::value && !is_matrix<U>::value>::type add (
+            const T& x,
+            const U& y
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( ((long)max_index_plus_one(x) <= x_vector_size() && x_vector_size() > 0) &&
+                         ((long)max_index_plus_one(y) <= y_vector_size() && y_vector_size() > 0) ,
+                "\t void running_cross_covariance::add()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t max_index_plus_one(x): " << max_index_plus_one(x) 
+                << "\n\t max_index_plus_one(y): " << max_index_plus_one(y) 
+                << "\n\t x_vector_size():       " << x_vector_size() 
+                << "\n\t y_vector_size():       " << y_vector_size() 
+                << "\n\t this:                  " << this
+                );
+
+            for (typename T::const_iterator i = x.begin(); i != x.end(); ++i)
+            {
+                sum_x(i->first) += i->second;
+                for (typename U::const_iterator j = y.begin(); j != y.end(); ++j)
+                {
+                    total_cov(i->first, j->first) += i->second*j->second;
+                }
+            }
+
+            // do sum_y += y
+            for (typename U::const_iterator j = y.begin(); j != y.end(); ++j)
+            {
+                sum_y(j->first) += j->second;
+            }
+
+            ++total_count;
+        }
+
+        template <typename T, typename U>
+        typename enable_if_c<is_matrix<T>::value && !is_matrix<U>::value>::type add (
+            const T& x,
+            const U& y
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( (is_col_vector(x) && x.size() == x_vector_size() && x_vector_size() > 0) &&
+                         ((long)max_index_plus_one(y) <= y_vector_size() && y_vector_size() > 0) ,
+                "\t void running_cross_covariance::add()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t is_col_vector(x):      " << is_col_vector(x) 
+                << "\n\t x.size():              " << x.size() 
+                << "\n\t max_index_plus_one(y): " << max_index_plus_one(y) 
+                << "\n\t x_vector_size():       " << x_vector_size() 
+                << "\n\t y_vector_size():       " << y_vector_size() 
+                << "\n\t this:                  " << this
+                );
+
+            sum_x += x;
+
+            for (long i = 0; i < x.size(); ++i)
+            {
+                for (typename U::const_iterator j = y.begin(); j != y.end(); ++j)
+                {
+                    total_cov(i, j->first) += x(i)*j->second;
+                }
+            }
+
+            // do sum_y += y
+            for (typename U::const_iterator j = y.begin(); j != y.end(); ++j)
+            {
+                sum_y(j->first) += j->second;
+            }
+
+            ++total_count;
+        }
+
+        template <typename T, typename U>
+        typename enable_if_c<!is_matrix<T>::value && is_matrix<U>::value>::type add (
+            const T& x,
+            const U& y
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( ((long)max_index_plus_one(x) <= x_vector_size() && x_vector_size() > 0) &&
+                         (is_col_vector(y) && y.size() == (long)y_vector_size() && y_vector_size() > 0) ,
+                "\t void running_cross_covariance::add()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t max_index_plus_one(x): " << max_index_plus_one(x) 
+                << "\n\t is_col_vector(y):      " << is_col_vector(y) 
+                << "\n\t y.size():              " << y.size() 
+                << "\n\t x_vector_size():       " << x_vector_size() 
+                << "\n\t y_vector_size():       " << y_vector_size() 
+                << "\n\t this:                  " << this
+                );
+
+            for (typename T::const_iterator i = x.begin(); i != x.end(); ++i)
+            {
+                sum_x(i->first) += i->second;
+                for (long j = 0; j < y.size(); ++j)
+                {
+                    total_cov(i->first, j) += i->second*y(j);
+                }
+            }
+
+            sum_y += y;
+
+            ++total_count;
+        }
+
+        template <typename T, typename U>
+        typename enable_if_c<is_matrix<T>::value && is_matrix<U>::value>::type add (
+            const T& x,
+            const U& y
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(x) && (x_vector_size() == 0 || x.size() == x_vector_size()) &&
+                        is_col_vector(y) && (y_vector_size() == 0 || y.size() == y_vector_size()) &&
+                        x.size() != 0 &&
+                        y.size() != 0,
+                "\t void running_cross_covariance::add()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t is_col_vector(x): " << is_col_vector(x) 
+                << "\n\t x_vector_size():  " << x_vector_size() 
+                << "\n\t x.size():         " << x.size() 
+                << "\n\t is_col_vector(y): " << is_col_vector(y) 
+                << "\n\t y_vector_size():  " << y_vector_size() 
+                << "\n\t y.size():         " << y.size() 
+                << "\n\t this:             " << this
+                );
+
+            x_vect_size = x.size();
+            y_vect_size = y.size();
+            if (total_count == 0)
+            {
+                total_cov = x*trans(y);
+                sum_x = x;
+                sum_y = y;
+            }
+            else
+            {
+                total_cov += x*trans(y);
+                sum_x += x;
+                sum_y += y;
+            }
+            ++total_count;
+        }
+
+        const column_matrix mean_x (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( current_n() != 0,
+                "\t running_cross_covariance::mean()"
+                << "\n\t This object can not execute this function in its current state."
+                << "\n\t current_n():      " << current_n() 
+                << "\n\t this:             " << this
+                );
+
+            return sum_x/total_count;
+        }
+
+        const column_matrix mean_y (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( current_n() != 0,
+                "\t running_cross_covariance::mean()"
+                << "\n\t This object can not execute this function in its current state."
+                << "\n\t current_n():      " << current_n() 
+                << "\n\t this:             " << this
+                );
+
+            return sum_y/total_count;
+        }
+
+        const general_matrix covariance_xy (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( current_n() > 1,
+                "\t running_cross_covariance::covariance()"
+                << "\n\t This object can not execute this function in its current state."
+                << "\n\t x_vector_size(): " << x_vector_size() 
+                << "\n\t y_vector_size(): " << y_vector_size() 
+                << "\n\t current_n():     " << current_n() 
+                << "\n\t this:            " << this
+                );
+
+            return (total_cov - sum_x*trans(sum_y)/total_count)/(total_count-1);
+        }
+
+        const running_cross_covariance operator+ (
+            const running_cross_covariance& item
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT((x_vector_size() == 0 || item.x_vector_size() == 0 || x_vector_size() == item.x_vector_size()) &&
+                        (y_vector_size() == 0 || item.y_vector_size() == 0 || y_vector_size() == item.y_vector_size()),
+                "\t running_cross_covariance running_cross_covariance::operator+()"
+                << "\n\t The two running_cross_covariance objects being added must have compatible parameters"
+                << "\n\t x_vector_size():            " << x_vector_size() 
+                << "\n\t item.x_vector_size():       " << item.x_vector_size() 
+                << "\n\t y_vector_size():            " << y_vector_size() 
+                << "\n\t item.y_vector_size():       " << item.y_vector_size() 
+                << "\n\t this:                       " << this
+                );
+
+            running_cross_covariance temp(item);
+
+            // make sure we ignore empty matrices
+            if (total_count != 0 && temp.total_count != 0)
+            {
+                temp.total_cov += total_cov;
+                temp.sum_x += sum_x;
+                temp.sum_y += sum_y;
+                temp.total_count += total_count;
+            }
+            else if (total_count != 0)
+            {
+                temp.total_cov = total_cov;
+                temp.sum_x = sum_x;
+                temp.sum_y = sum_y;
+                temp.total_count = total_count;
+            }
+
+            return temp;
+        }
+
+
+    private:
+
+        general_matrix total_cov;
+        column_matrix sum_x;
+        column_matrix sum_y;
+        scalar_type total_count;
+
+        long x_vect_size;
+        long y_vect_size;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class vector_normalizer
+    {
+    public:
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef matrix_type result_type;
+
+        template <typename vector_type>
+        void train (
+            const vector_type& samples
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(samples.size() > 0,
+                "\tvoid vector_normalizer::train()"
+                << "\n\tyou have to give a nonempty set of samples to this function"
+                << "\n\tthis: " << this
+                );
+
+            m = mean(mat(samples));
+            sd = reciprocal(sqrt(variance(mat(samples))));
+
+            DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer::train() have infinite or NaN values");
+        }
+
+        long in_vector_size (
+        ) const
+        {
+            return m.nr();
+        }
+
+        long out_vector_size (
+        ) const
+        {
+            return m.nr();
+        }
+
+        const matrix_type& means (
+        ) const
+        {
+            return m;
+        }
+
+        const matrix_type& std_devs (
+        ) const
+        {
+            return sd;
+        }
+
+        const result_type& operator() (
+            const matrix_type& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(x.nr() == in_vector_size() && x.nc() == 1,
+                "\tmatrix vector_normalizer::operator()"
+                << "\n\t you have given invalid arguments to this function"
+                << "\n\t x.nr():           " << x.nr()
+                << "\n\t in_vector_size(): " << in_vector_size()
+                << "\n\t x.nc():           " << x.nc()
+                << "\n\t this:             " << this
+                );
+
+            temp_out = pointwise_multiply(x-m, sd);
+            return temp_out;
+        }
+
+        void swap (
+            vector_normalizer& item
+        )
+        {
+            m.swap(item.m);
+            sd.swap(item.sd);
+            temp_out.swap(item.temp_out);
+        }
+
+        template <typename mt>
+        friend void deserialize (
+            vector_normalizer<mt>& item, 
+            std::istream& in
+        ); 
+
+        template <typename mt>
+        friend void serialize (
+            const vector_normalizer<mt>& item, 
+            std::ostream& out 
+        );
+
+    private:
+
+        // ------------------- private data members -------------------
+
+        matrix_type m, sd;
+
+        // This is just a temporary variable that doesn't contribute to the
+        // state of this object.
+        mutable matrix_type temp_out;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    inline void swap (
+        vector_normalizer<matrix_type>& a, 
+        vector_normalizer<matrix_type>& b 
+    ) { a.swap(b); }   
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    void deserialize (
+        vector_normalizer<matrix_type>& item, 
+        std::istream& in
+    )   
+    {
+        deserialize(item.m, in);
+        deserialize(item.sd, in);
+        // Keep deserializing the pca matrix for backwards compatibility.
+        matrix<double> pca;
+        deserialize(pca, in);
+
+        if (pca.size() != 0)
+            throw serialization_error("Error deserializing object of type vector_normalizer\n"   
+                                        "It looks like a serialized vector_normalizer_pca was accidentally deserialized into \n"
+                                        "a vector_normalizer object.");
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    void serialize (
+        const vector_normalizer<matrix_type>& item, 
+        std::ostream& out 
+    )
+    {
+        serialize(item.m, out);
+        serialize(item.sd, out);
+        // Keep serializing the pca matrix for backwards compatibility.
+        matrix<double> pca;
+        serialize(pca, out);
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class vector_normalizer_pca
+    {
+    public:
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef matrix<scalar_type,0,1,mem_manager_type> result_type;
+
+        template <typename vector_type>
+        void train (
+            const vector_type& samples,
+            const double eps = 0.99
+        )
+        {
+            // You are getting an error here because you are trying to apply PCA
+            // to a vector of fixed length.  But PCA is going to try and do 
+            // dimensionality reduction so you can't use a vector with a fixed dimension.
+            COMPILE_TIME_ASSERT(matrix_type::NR == 0);
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(samples.size() > 0,
+                "\tvoid vector_normalizer_pca::train_pca()"
+                << "\n\tyou have to give a nonempty set of samples to this function"
+                << "\n\tthis: " << this
+                );
+            DLIB_ASSERT(0 < eps && eps <= 1,
+                "\tvoid vector_normalizer_pca::train_pca()"
+                << "\n\tyou have to give a nonempty set of samples to this function"
+                << "\n\tthis: " << this
+                );
+            train_pca_impl(mat(samples),eps);
+
+            DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer_pca::train() have infinite or NaN values");
+        }
+
+        long in_vector_size (
+        ) const
+        {
+            return m.nr();
+        }
+
+        long out_vector_size (
+        ) const
+        {
+            return pca.nr();
+        }
+
+        const matrix<scalar_type,0,1,mem_manager_type>& means (
+        ) const
+        {
+            return m;
+        }
+
+        const matrix<scalar_type,0,1,mem_manager_type>& std_devs (
+        ) const
+        {
+            return sd;
+        }
+
+        const matrix<scalar_type,0,0,mem_manager_type>& pca_matrix (
+        ) const
+        {
+            return pca;
+        }
+
+        const result_type& operator() (
+            const matrix_type& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(x.nr() == in_vector_size() && x.nc() == 1,
+                "\tmatrix vector_normalizer_pca::operator()"
+                << "\n\t you have given invalid arguments to this function"
+                << "\n\t x.nr():           " << x.nr()
+                << "\n\t in_vector_size(): " << in_vector_size()
+                << "\n\t x.nc():           " << x.nc()
+                << "\n\t this:             " << this
+                );
+
+            // If we have a pca transform matrix on hand then
+            // also apply that.
+            temp_out = pca*pointwise_multiply(x-m, sd);
+
+            return temp_out;
+        }
+
+        void swap (
+            vector_normalizer_pca& item
+        )
+        {
+            m.swap(item.m);
+            sd.swap(item.sd);
+            pca.swap(item.pca);
+            temp_out.swap(item.temp_out);
+        }
+
+        template <typename T>
+        friend void deserialize (
+            vector_normalizer_pca<T>& item, 
+            std::istream& in
+        );
+
+        template <typename T>
+        friend void serialize (
+            const vector_normalizer_pca<T>& item, 
+            std::ostream& out 
+        );
+
+    private:
+
+        template <typename mat_type>
+        void train_pca_impl (
+            const mat_type& samples,
+            const double eps 
+        )
+        {
+            m = mean(samples);
+            sd = reciprocal(sqrt(variance(samples)));
+
+            // fill x with the normalized version of the input samples
+            matrix<typename mat_type::type,0,1,mem_manager_type> x(samples);
+            for (long r = 0; r < x.size(); ++r)
+                x(r) = pointwise_multiply(x(r)-m, sd);
+
+            matrix<scalar_type,0,0,mem_manager_type> temp, eigen;
+            matrix<scalar_type,0,1,mem_manager_type> eigenvalues;
+
+            // Compute the svd of the covariance matrix of the normalized inputs
+            svd(covariance(x), temp, eigen, pca);
+            eigenvalues = diag(eigen);
+
+            rsort_columns(pca, eigenvalues);
+
+            // figure out how many eigenvectors we want in our pca matrix
+            const double thresh = sum(eigenvalues)*eps;
+            long num_vectors = 0;
+            double total = 0;
+            for (long r = 0; r < eigenvalues.size() && total < thresh; ++r)
+            {
+                ++num_vectors;
+                total += eigenvalues(r);
+            }
+
+            // So now we know we want to use num_vectors of the first eigenvectors.  So
+            // pull those out and discard the rest.
+            pca = trans(colm(pca,range(0,num_vectors-1)));
+
+            // Apply the pca transform to the data in x.  Then we will normalize the
+            // pca matrix below.
+            for (long r = 0; r < x.nr(); ++r)
+            {
+                x(r) = pca*x(r);
+            }
+
+            // Here we just scale the output features from the pca transform so 
+            // that the variance of each feature is 1.  So this doesn't really change
+            // what the pca is doing, it just makes sure the output features are
+            // normalized.
+            pca = trans(scale_columns(trans(pca), reciprocal(sqrt(variance(x)))));
+        }
+
+
+        // ------------------- private data members -------------------
+
+        matrix<scalar_type,0,1,mem_manager_type> m, sd;
+        matrix<scalar_type,0,0,mem_manager_type> pca;
+
+        // This is just a temporary variable that doesn't contribute to the
+        // state of this object.
+        mutable result_type temp_out;
+    };
+
+    template <
+        typename matrix_type
+        >
+    inline void swap (
+        vector_normalizer_pca<matrix_type>& a, 
+        vector_normalizer_pca<matrix_type>& b 
+    ) { a.swap(b); }   
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    void deserialize (
+        vector_normalizer_pca<matrix_type>& item, 
+        std::istream& in
+    )   
+    {
+        deserialize(item.m, in);
+        deserialize(item.sd, in);
+        deserialize(item.pca, in);
+        if (item.pca.nc() != item.m.nr())
+            throw serialization_error("Error deserializing object of type vector_normalizer_pca\n"   
+                                        "It looks like a serialized vector_normalizer was accidentally deserialized into \n"
+                                        "a vector_normalizer_pca object.");
+    }
+
+    template <
+        typename matrix_type
+        >
+    void serialize (
+        const vector_normalizer_pca<matrix_type>& item, 
+        std::ostream& out 
+    )
+    {
+        serialize(item.m, out);
+        serialize(item.sd, out);
+        serialize(item.pca, out);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    inline double binomial_random_vars_are_different (
+        uint64_t k1,
+        uint64_t n1,
+        uint64_t k2,
+        uint64_t n2
+    )
+    {
+        DLIB_ASSERT(k1 <= n1, "k1: "<< k1 << "  n1: "<< n1);
+        DLIB_ASSERT(k2 <= n2, "k2: "<< k2 << "  n2: "<< n2);
+
+        const double p1 = k1/(double)n1;
+        const double p2 = k2/(double)n2;
+        const double p = (k1+k2)/(double)(n1+n2);
+
+        auto ll = [](double p, uint64_t k, uint64_t n) {      
+            if (p == 0 || p == 1)
+                return 0.0;
+            return k*std::log(p) + (n-k)*std::log(1-p);
+        };
+
+        auto logll = ll(p1,k1,n1) + ll(p2,k2,n2) - ll(p,k1,n1) - ll(p,k2,n2); 
+
+        // The basic statistic only tells you if the random variables are different.  But
+        // it's nice to know which way they are different, i.e., which one is bigger.  So
+        // stuff that information into the sign bit of the return value.
+        if (p1>=p2)
+            return logll;
+        else
+            return -logll;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    inline double event_correlation (
+        uint64_t A_count,
+        uint64_t B_count,
+        uint64_t AB_count,
+        uint64_t total_num_observations
+    )
+    {
+        DLIB_ASSERT(AB_count <= A_count && A_count <= total_num_observations,
+            "AB_count: " << AB_count << ", A_count: "<< A_count << ", total_num_observations: " << total_num_observations);
+        DLIB_ASSERT(AB_count <= B_count && B_count <= total_num_observations,
+            "AB_count: " << AB_count << ", B_count: "<< B_count << ", total_num_observations: " << total_num_observations);
+
+        if (total_num_observations == 0)
+            return 0;
+
+        DLIB_ASSERT(A_count + B_count - AB_count <= total_num_observations,
+            "AB_count: " << AB_count << " A_count: " << A_count <<  ", B_count: "<< B_count << ", total_num_observations: " << total_num_observations);
+
+
+        const auto AnotB_count = A_count - AB_count;
+        const auto notB_count = total_num_observations - B_count;
+        // How likely is it that the odds of A happening is different when conditioned on
+        // whether or not B happened?
+        return binomial_random_vars_are_different( 
+            AB_count, B_count,      // A conditional on the presence of B
+            AnotB_count, notB_count // A conditional on the absence of B 
+        );
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STATISTICs_
+
diff --git a/ml/dlib/dlib/statistics/statistics_abstract.h b/ml/dlib/dlib/statistics/statistics_abstract.h
new file mode 100644
index 000000000..ef8f13802
--- /dev/null
+++ b/ml/dlib/dlib/statistics/statistics_abstract.h
@@ -0,0 +1,1387 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net), Steve Taylor
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STATISTICs_ABSTRACT_
+#ifdef DLIB_STATISTICs_ABSTRACT_
+
+#include <limits>
+#include <cmath>
+#include "../matrix/matrix_abstract.h"
+#include "../svm/sparse_vector_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename alloc
+        >
+    double mean_sign_agreement (
+        const std::vector<T,alloc>& a,
+        const std::vector<T,alloc>& b
+    );
+    /*!
+        requires
+            - a.size() == b.size()
+        ensures
+            - returns the number of times a[i] has the same sign as b[i] divided by
+              a.size().  So we return the probability that elements of a and b have
+              the same sign.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename alloc
+        >
+    double correlation (
+        const std::vector<T,alloc>& a,
+        const std::vector<T,alloc>& b
+    );
+    /*!
+        requires
+            - a.size() == b.size()
+            - a.size() > 1
+        ensures
+            - returns the correlation coefficient between all the elements of a and b.
+              (i.e. how correlated is a(i) with b(i))
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename alloc
+        >
+    double covariance (
+        const std::vector<T,alloc>& a,
+        const std::vector<T,alloc>& b
+    );
+    /*!
+        requires
+            - a.size() == b.size()
+            - a.size() > 1
+        ensures
+            - returns the covariance between all the elements of a and b.
+              (i.e. how does a(i) vary with b(i))
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename alloc
+        >
+    double r_squared (
+        const std::vector<T,alloc>& a,
+        const std::vector<T,alloc>& b
+    );
+    /*!
+        requires
+            - a.size() == b.size()
+            - a.size() > 1
+        ensures
+            - returns the R^2 coefficient of determination between all the elements of a and b.
+              This value is just the square of correlation(a,b).
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T, 
+        typename alloc
+        >
+    double mean_squared_error (
+        const std::vector<T,alloc>& a,
+        const std::vector<T,alloc>& b
+    );
+    /*!
+        requires
+            - a.size() == b.size()
+        ensures
+            - returns the mean squared error between all the elements of a and b.
+              (i.e. mean(squared(mat(a)-mat(b))))
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    double binomial_random_vars_are_different (
+        uint64_t k1,
+        uint64_t n1,
+        uint64_t k2,
+        uint64_t n2
+    );
+    /*!
+        requires
+            - k1 <= n1
+            - k2 <= n2
+        ensures
+            - Given two binomially distributed random variables, X1 and X2, we want to know
+              if these variables have the same parameter (i.e. the chance of "success").
+              So assume that:
+                - You observed X1 to give k1 successes out of n1 trials.
+                - You observed X2 to give k2 successes out of n2 trials.
+            - This function performs a simple likelihood ratio test to determine if X1 and
+              X2 have the same parameter.  The return value of this function will be:
+                - Close to 0 if they are probably the same.
+                - Larger than 0 if X1 probably has a higher "success" rate than X2. 
+                - Smaller than 0 if X2 probably has a higher "success" rate than X1. 
+              Moreover, the larger the absolute magnitude of the return value the more
+              likely it is that X1 and X2 have different distributions.
+            - For a discussion of the technique and applications see:
+                  Dunning, Ted. "Accurate methods for the statistics of surprise and
+                  coincidence." Computational linguistics 19.1 (1993): 61-74.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    double event_correlation (
+        uint64_t A_count,
+        uint64_t B_count,
+        uint64_t AB_count,
+        uint64_t total_num_observations
+    );
+    /*!
+        requires
+            - AB_count <= A_count <= total_num_observations
+            - AB_count <= B_count <= total_num_observations
+            - A_count + B_count - AB_count <= total_num_observations
+        ensures
+            - This function does a statistical test to determine if two events co-occur in
+              a statistically significant way.  In particular, we assume you performed
+              total_num_observations measurements and during those measurements you:
+                - Observed event A to happen A_count times.
+                - Observed event B to happen B_count times.
+                - Observed AB_count co-occurrences of the events.  That is, AB_count is the
+                  number of times the events happened together during the same measurement.
+            - This function returns a number, COR, which can take any real value.  It has
+              the following interpretations:
+                - COR == 0: there is no evidence of correlation between the two events.
+                  They appear to be unrelated.
+                - COR > 0: There is evidence that A and B co-occur together.  That is,
+                  they happen at the same times more often than you would expect if they
+                  were independent events.  The larger the magnitude of COR the more
+                  evidence we have for the correlation.
+                - COR < 0: There is evidence that A and B are anti-correlated.  That is,
+                  when A happens B is unlikely to happen and vise versa.  The larger the
+                  magnitude of COR the more evidence we have for the anti-correlation.
+            - This function implements the simple likelihood ratio test discussed in the
+              following paper:
+                  Dunning, Ted. "Accurate methods for the statistics of surprise and
+                  coincidence." Computational linguistics 19.1 (1993): 61-74.
+              So for an extended discussion of the method see the above paper.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    class running_stats
+    {
+        /*!
+            REQUIREMENTS ON T
+                - T must be a float, double, or long double type
+
+            INITIAL VALUE
+                - mean() == 0
+                - current_n() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents something that can compute the running mean, 
+                variance, skewness, and excess kurtosis of a stream of real numbers.  
+        !*/
+    public:
+
+        running_stats(
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        void clear(
+        );
+        /*!
+            ensures
+                - this object has its initial value
+                - clears all memory of any previous data points
+        !*/
+
+        T current_n (
+        ) const;
+        /*!
+            ensures
+                - returns the number of points given to this object so far. 
+        !*/
+
+        void add (
+            const T& val
+        );
+        /*!
+            ensures
+                - updates the mean, variance, skewness, and kurtosis stored in this object
+                  so that the new value is factored into them.
+                - #mean() == mean()*current_n()/(current_n()+1) + val/(current_n()+1).
+                  (i.e. the updated mean value that takes the new value into account)
+                - #variance() == the updated variance that takes this new value into account.
+                - #skewness() == the updated skewness that takes this new value into account.
+                - #ex_kurtosis() == the updated kurtosis that takes this new value into account.
+                - #current_n() == current_n() + 1
+        !*/
+
+        T mean (
+        ) const;
+        /*!
+            ensures
+                - returns the mean of all the values presented to this object 
+                  so far.
+        !*/
+
+        T variance (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the unbiased sample variance of all the values presented to this
+                  object so far.
+        !*/
+
+        T stddev (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the unbiased sampled standard deviation of all the values
+                  presented to this object so far.
+        !*/
+
+        T skewness (
+        ) const;
+        /*!
+            requires
+                - current_n() > 2
+            ensures
+                - returns the unbiased sample skewness of all the values presented 
+                  to this object so far.
+        !*/
+
+        T ex_kurtosis(
+        ) const;
+        /*!
+            requires
+                - current_n() > 3
+            ensures
+                - returns the unbiased sample kurtosis of all the values presented 
+                  to this object so far.
+        !*/
+
+        T max (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the largest value presented to this object so far.
+        !*/
+
+        T min (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the smallest value presented to this object so far.
+        !*/
+
+        T scale (
+            const T& val
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - return (val-mean())/stddev();
+        !*/
+
+        running_stats operator+ (
+            const running_stats& rhs
+        ) const;
+        /*!
+            ensures
+                - returns a new running_stats object that represents the combination of all
+                  the values given to *this and rhs.  That is, this function returns a
+                  running_stats object, R, that is equivalent to what you would obtain if
+                  all calls to this->add() and rhs.add() had instead been done to R.
+        !*/
+    };
+
+    template <typename T>
+    void serialize (
+        const running_stats<T>& item, 
+        std::ostream& out 
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+    template <typename T>
+    void deserialize (
+        running_stats<T>& item, 
+        std::istream& in
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    class running_scalar_covariance
+    {
+        /*!
+            REQUIREMENTS ON T
+                - T must be a float, double, or long double type
+
+            INITIAL VALUE
+                - mean_x() == 0
+                - mean_y() == 0
+                - current_n() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents something that can compute the running covariance 
+                of a stream of real number pairs.
+        !*/
+
+    public:
+
+        running_scalar_covariance(
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        void clear(
+        );
+        /*!
+            ensures
+                - this object has its initial value
+                - clears all memory of any previous data points
+        !*/
+
+        void add (
+            const T& x,
+            const T& y
+        );
+        /*!
+            ensures
+                - updates the statistics stored in this object so that
+                  the new pair (x,y) is factored into them.
+                - #current_n() == current_n() + 1
+        !*/
+
+        T current_n (
+        ) const;
+        /*!
+            ensures
+                - returns the number of points given to this object so far. 
+        !*/
+
+        T mean_x (
+        ) const;
+        /*!
+            ensures
+                - returns the mean value of all x samples presented to this object
+                  via add().
+        !*/
+
+        T mean_y (
+        ) const;
+        /*!
+            ensures
+                - returns the mean value of all y samples presented to this object
+                  via add().
+        !*/
+
+        T covariance (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the covariance between all the x and y samples presented
+                  to this object via add()
+        !*/
+
+        T correlation (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the correlation coefficient between all the x and y samples 
+                  presented to this object via add()
+        !*/
+
+        T variance_x (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the unbiased sample variance value of all x samples presented 
+                  to this object via add().
+        !*/
+
+        T variance_y (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the unbiased sample variance value of all y samples presented 
+                  to this object via add().
+        !*/
+
+        T stddev_x (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the unbiased sample standard deviation of all x samples
+                  presented to this object via add().
+        !*/
+
+        T stddev_y (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the unbiased sample standard deviation of all y samples
+                  presented to this object via add().
+        !*/
+
+        running_scalar_covariance operator+ (
+            const running_covariance& rhs
+        ) const;
+        /*!
+            ensures
+                - returns a new running_scalar_covariance object that represents the
+                  combination of all the values given to *this and rhs.  That is, this
+                  function returns a running_scalar_covariance object, R, that is
+                  equivalent to what you would obtain if all calls to this->add() and
+                  rhs.add() had instead been done to R.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    class running_scalar_covariance_decayed
+    {
+        /*!
+            REQUIREMENTS ON T
+                - T must be a float, double, or long double type
+
+            INITIAL VALUE
+                - mean_x() == 0
+                - mean_y() == 0
+                - current_n() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents something that can compute the running covariance of
+                a stream of real number pairs.  It is essentially the same as
+                running_scalar_covariance except that it forgets about data it has seen
+                after a certain period of time.  It does this by exponentially decaying old
+                statistics. 
+        !*/
+
+    public:
+
+        running_scalar_covariance_decayed(
+            T decay_halflife = 1000 
+        );
+        /*!
+            requires
+                - decay_halflife > 0
+            ensures
+                - #forget_factor() == std::pow(0.5, 1/decay_halflife);
+                  (i.e. after decay_halflife calls to add() the data given to the first add
+                  will be down weighted by 0.5 in the statistics stored in this object). 
+        !*/
+
+        T forget_factor (
+        ) const;
+        /*!
+            ensures
+                - returns the exponential forget factor used to forget old statistics when
+                  add() is called.
+        !*/
+
+        void add (
+            const T& x,
+            const T& y
+        );
+        /*!
+            ensures
+                - updates the statistics stored in this object so that
+                  the new pair (x,y) is factored into them.
+                - #current_n() == current_n()*forget_factor() + forget_factor()
+                - Down weights old statistics by a factor of forget_factor().
+        !*/
+
+        T current_n (
+        ) const;
+        /*!
+            ensures
+                - returns the effective number of points given to this object.   As add()
+                  is called this value will converge to a constant, the value of which is
+                  based on the decay_halflife supplied to the constructor.
+        !*/
+
+        T mean_x (
+        ) const;
+        /*!
+            ensures
+                - returns the mean value of all x samples presented to this object
+                  via add().
+        !*/
+
+        T mean_y (
+        ) const;
+        /*!
+            ensures
+                - returns the mean value of all y samples presented to this object
+                  via add().
+        !*/
+
+        T covariance (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the covariance between all the x and y samples presented
+                  to this object via add()
+        !*/
+
+        T correlation (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the correlation coefficient between all the x and y samples 
+                  presented to this object via add()
+        !*/
+
+        T variance_x (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the sample variance value of all x samples presented 
+                  to this object via add().
+        !*/
+
+        T variance_y (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the sample variance value of all y samples presented 
+                  to this object via add().
+        !*/
+
+        T stddev_x (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the sample standard deviation of all x samples
+                  presented to this object via add().
+        !*/
+
+        T stddev_y (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the sample standard deviation of all y samples
+                  presented to this object via add().
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    class running_stats_decayed
+    {
+        /*!
+            REQUIREMENTS ON T
+                - T must be a float, double, or long double type
+
+            INITIAL VALUE
+                - mean() == 0
+                - current_n() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents something that can compute the running mean and
+                variance of a stream of real numbers.  It is similar to running_stats
+                except that it forgets about data it has seen after a certain period of
+                time.  It does this by exponentially decaying old statistics. 
+        !*/
+
+    public:
+
+        running_stats_decayed(
+            T decay_halflife = 1000 
+        );
+        /*!
+            requires
+                - decay_halflife > 0
+            ensures
+                - #forget_factor() == std::pow(0.5, 1/decay_halflife);
+                  (i.e. after decay_halflife calls to add() the data given to the first add
+                  will be down weighted by 0.5 in the statistics stored in this object). 
+        !*/
+
+        T forget_factor (
+        ) const;
+        /*!
+            ensures
+                - returns the exponential forget factor used to forget old statistics when
+                  add() is called.
+        !*/
+
+        void add (
+            const T& x
+        );
+        /*!
+            ensures
+                - updates the statistics stored in this object so that x is factored into
+                  them.
+                - #current_n() == current_n()*forget_factor() + forget_factor()
+                - Down weights old statistics by a factor of forget_factor().
+        !*/
+
+        T current_n (
+        ) const;
+        /*!
+            ensures
+                - returns the effective number of points given to this object.   As add()
+                  is called this value will converge to a constant, the value of which is
+                  based on the decay_halflife supplied to the constructor.
+        !*/
+
+        T mean (
+        ) const;
+        /*!
+            ensures
+                - returns the mean value of all x samples presented to this object
+                  via add().
+        !*/
+
+        T variance (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the sample variance value of all x samples presented to this
+                  object via add().
+        !*/
+
+        T stddev (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the sample standard deviation of all x samples presented to this
+                  object via add().
+        !*/
+
+    };
+
+    template <typename T>
+    void serialize (
+        const running_stats_decayed<T>& item, 
+        std::ostream& out 
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+    template <typename T>
+    void deserialize (
+        running_stats_decayed<T>& item, 
+        std::istream& in
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class running_covariance
+    {
+        /*!
+            REQUIREMENTS ON matrix_type
+                Must be some type of dlib::matrix.
+
+            INITIAL VALUE
+                - in_vector_size() == 0
+                - current_n() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a simple tool for computing the mean and
+                covariance of a sequence of vectors.  
+        !*/
+    public:
+
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef typename matrix_type::layout_type layout_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
+        typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
+
+        running_covariance(
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        void clear(
+        );
+        /*!
+            ensures
+                - this object has its initial value
+                - clears all memory of any previous data points
+        !*/
+
+        long current_n (
+        ) const;
+        /*!
+            ensures
+                - returns the number of samples that have been presented to this object
+        !*/
+
+        long in_vector_size (
+        ) const;
+        /*!
+            ensures
+                - if (this object has been presented with any input vectors or
+                  set_dimension() has been called) then
+                    - returns the dimension of the column vectors used with this object
+                - else
+                    - returns 0
+        !*/
+
+        void set_dimension (
+            long size
+        );
+        /*!
+            requires
+                - size > 0
+            ensures
+                - #in_vector_size() == size
+                - #current_n() == 0
+        !*/
+
+        template <typename T>
+        void add (
+            const T& val
+        );
+        /*!
+            requires
+                - val must represent a column vector.  It can either be a dlib::matrix
+                  object or some kind of unsorted sparse vector type.  See the top of
+                  dlib/svm/sparse_vector_abstract.h for a definition of unsorted sparse vector.
+                - val must have a number of dimensions which is compatible with the current
+                  setting of in_vector_size().  In particular, this means that the
+                  following must hold:
+                    - if (val is a dlib::matrix) then 
+                        - in_vector_size() == 0 || val.size() == val_vector_size()
+                    - else
+                        - max_index_plus_one(val) <= in_vector_size()
+                        - in_vector_size() > 0 
+                          (i.e. you must call set_dimension() prior to calling add() if
+                          you want to use sparse vectors.)
+            ensures
+                - updates the mean and covariance stored in this object so that
+                  the new value is factored into them.
+                - if (val is a dlib::matrix) then
+                    - #in_vector_size() == val.size()
+        !*/
+
+        const column_matrix mean (
+        ) const;
+        /*!
+            requires
+                - in_vector_size() != 0 
+            ensures
+                - returns the mean of all the vectors presented to this object 
+                  so far.
+        !*/
+
+        const general_matrix covariance (
+        ) const;
+        /*!
+            requires
+                - in_vector_size() != 0 
+                - current_n() > 1
+            ensures
+                - returns the unbiased sample covariance matrix for all the vectors 
+                  presented to this object so far.
+        !*/
+
+        const running_covariance operator+ (
+            const running_covariance& item
+        ) const;
+        /*!
+            requires
+                - in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()
+                  (i.e. the in_vector_size() of *this and item must match or one must be zero)
+            ensures
+                - returns a new running_covariance object that represents the combination of all 
+                  the vectors given to *this and item.  That is, this function returns a
+                  running_covariance object, R, that is equivalent to what you would obtain if all
+                  calls to this->add() and item.add() had instead been done to R.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class running_cross_covariance
+    {
+        /*!
+            REQUIREMENTS ON matrix_type
+                Must be some type of dlib::matrix.
+
+            INITIAL VALUE
+                - x_vector_size() == 0
+                - y_vector_size() == 0
+                - current_n() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a simple tool for computing the mean and cross-covariance
+                matrices of a sequence of pairs of vectors.  
+        !*/
+
+    public:
+
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef typename matrix_type::layout_type layout_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
+        typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
+
+        running_cross_covariance(
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        void clear(
+        );
+        /*!
+            ensures
+                - This object has its initial value.
+                - Clears all memory of any previous data points.
+        !*/
+
+        long x_vector_size (
+        ) const;
+        /*!
+            ensures
+                - if (this object has been presented with any input vectors or
+                  set_dimensions() has been called) then
+                    - returns the dimension of the x vectors given to this object via add().
+                - else
+                    - returns 0
+        !*/
+
+        long y_vector_size (
+        ) const;
+        /*!
+            ensures
+                - if (this object has been presented with any input vectors or
+                  set_dimensions() has been called) then
+                    - returns the dimension of the y vectors given to this object via add().
+                - else
+                    - returns 0
+        !*/
+
+        void set_dimensions (
+            long x_size,
+            long y_size
+        );
+        /*!
+            requires
+                - x_size > 0
+                - y_size > 0
+            ensures
+                - #x_vector_size() == x_size
+                - #y_vector_size() == y_size
+                - #current_n() == 0
+        !*/
+
+        long current_n (
+        ) const;
+        /*!
+            ensures
+                - returns the number of samples that have been presented to this object.
+        !*/
+
+        template <typename T, typename U>
+        void add (
+            const T& x,
+            const U& y
+        );
+        /*!
+            requires
+                - x and y must represent column vectors.  They can either be dlib::matrix
+                  objects or some kind of unsorted sparse vector type.  See the top of
+                  dlib/svm/sparse_vector_abstract.h for a definition of unsorted sparse vector.
+                - x and y must have a number of dimensions which is compatible with the
+                  current setting of x_vector_size() and y_vector_size().  In particular,
+                  this means that the following must hold:
+                    - if (x or y is a sparse vector type) then
+                        - x_vector_size() > 0 && y_vector_size() > 0
+                          (i.e. you must call set_dimensions() prior to calling add() if
+                          you want to use sparse vectors.)
+                    - if (x is a dlib::matrix) then 
+                        - x_vector_size() == 0 || x.size() == x_vector_size()
+                    - else
+                        - max_index_plus_one(x) <= x_vector_size()
+                    - if (y is a dlib::matrix) then 
+                        - y_vector_size() == 0 || y.size() == y_vector_size()
+                    - else
+                        - max_index_plus_one(y) <= y_vector_size()
+            ensures
+                - updates the mean and cross-covariance matrices stored in this object so
+                  that the new (x,y) vector pair is factored into them.
+                - if (x is a dlib::matrix) then
+                    - #x_vector_size() == x.size()
+                - if (y is a dlib::matrix) then
+                    - #y_vector_size() == y.size()
+        !*/
+
+        const column_matrix mean_x (
+        ) const;
+        /*!
+            requires
+                - current_n() != 0 
+            ensures
+                - returns the mean of all the x vectors presented to this object so far.
+                - The returned vector will have x_vector_size() dimensions.
+        !*/
+
+        const column_matrix mean_y (
+        ) const;
+        /*!
+            requires
+                - current_n() != 0 
+            ensures
+                - returns the mean of all the y vectors presented to this object so far.
+                - The returned vector will have y_vector_size() dimensions.
+        !*/
+
+        const general_matrix covariance_xy (
+        ) const;
+        /*!
+            requires
+                - current_n() > 1
+            ensures
+                - returns the unbiased sample cross-covariance matrix for all the vector
+                  pairs presented to this object so far.  In particular, returns a matrix
+                  M such that:
+                    - M.nr() == x_vector_size()
+                    - M.nc() == y_vector_size()
+                    - M == the cross-covariance matrix of the data given to add().
+        !*/
+
+        const running_cross_covariance operator+ (
+            const running_cross_covariance& item
+        ) const;
+        /*!
+            requires
+                - x_vector_size() == 0 || item.x_vector_size() == 0 || x_vector_size() == item.x_vector_size()
+                  (i.e. the x_vector_size() of *this and item must match or one must be zero)
+                - y_vector_size() == 0 || item.y_vector_size() == 0 || y_vector_size() == item.y_vector_size()
+                  (i.e. the y_vector_size() of *this and item must match or one must be zero)
+            ensures
+                - returns a new running_cross_covariance object that represents the
+                  combination of all the vectors given to *this and item.  That is, this
+                  function returns a running_cross_covariance object, R, that is equivalent
+                  to what you would obtain if all calls to this->add() and item.add() had
+                  instead been done to R.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class vector_normalizer
+    {
+        /*!
+            REQUIREMENTS ON matrix_type
+                - must be a dlib::matrix object capable of representing column 
+                  vectors
+
+            INITIAL VALUE
+                - in_vector_size() == 0
+                - out_vector_size() == 0
+                - means().size() == 0
+                - std_devs().size() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents something that can learn to normalize a set 
+                of column vectors.  In particular, normalized column vectors should 
+                have zero mean and a variance of one.  
+
+                Also, if desired, this object can use principal component 
+                analysis for the purposes of reducing the number of elements in a 
+                vector.  
+
+            THREAD SAFETY
+                Note that this object contains a cached matrix object it uses 
+                to store intermediate results for normalization.  This avoids
+                needing to reallocate it every time this object performs normalization
+                but also makes it non-thread safe.  So make sure you don't share
+                instances of this object between threads. 
+        !*/
+
+    public:
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef matrix_type result_type;
+
+        template <typename vector_type>
+        void train (
+            const vector_type& samples
+        );
+        /*!
+            requires
+                - samples.size() > 0
+                - samples == a column matrix or something convertible to a column 
+                  matrix via mat().  Also, x should contain 
+                  matrix_type objects that represent nonempty column vectors.
+                - samples does not contain any infinite or NaN values
+            ensures
+                - #in_vector_size() == samples(0).nr()
+                - #out_vector_size() == samples(0).nr()
+                - This object has learned how to normalize vectors that look like
+                  vectors in the given set of samples.  
+                - #means() == mean(samples)
+                - #std_devs() == reciprocal(sqrt(variance(samples)));
+        !*/
+
+        long in_vector_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of rows that input vectors are
+                  required to contain if they are to be normalized by
+                  this object.
+        !*/
+
+        long out_vector_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of rows in the normalized vectors
+                  that come out of this object.
+        !*/
+
+        const matrix_type& means (
+        ) const;
+        /*!
+            ensures               
+                - returns a matrix M such that:
+                    - M.nc() == 1
+                    - M.nr() == in_vector_size()
+                    - M(i) == the mean of the ith input feature shown to train()
+        !*/
+
+        const matrix_type& std_devs (
+        ) const;
+        /*!
+            ensures               
+                - returns a matrix SD such that:
+                    - SD.nc() == 1
+                    - SD.nr() == in_vector_size()
+                    - SD(i) == the reciprocal of the standard deviation of the ith 
+                      input feature shown to train() 
+        !*/
+ 
+        const result_type& operator() (
+            const matrix_type& x
+        ) const;
+        /*!
+            requires
+                - x.nr() == in_vector_size()
+                - x.nc() == 1
+            ensures
+                - returns a normalized version of x, call it Z, that has the 
+                  following properties: 
+                    - Z.nr() == out_vector_size()
+                    - Z.nc() == 1
+                    - the mean of each element of Z is 0 
+                    - the variance of each element of Z is 1
+                    - Z == pointwise_multiply(x-means(), std_devs());
+        !*/
+
+        void swap (
+            vector_normalizer& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+    };
+
+    template <
+        typename matrix_type
+        >
+    inline void swap (
+        vector_normalizer<matrix_type>& a, 
+        vector_normalizer<matrix_type>& b 
+    ) { a.swap(b); }   
+    /*!
+        provides a global swap function
+    !*/
+
+    template <
+        typename matrix_type,
+        >
+    void deserialize (
+        vector_normalizer<matrix_type>& item, 
+        std::istream& in
+    );   
+    /*!
+        provides deserialization support 
+    !*/
+
+    template <
+        typename matrix_type,
+        >
+    void serialize (
+        const vector_normalizer<matrix_type>& item, 
+        std::ostream& out 
+    );   
+    /*!
+        provides serialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class vector_normalizer_pca
+    {
+        /*!
+            REQUIREMENTS ON matrix_type
+                - must be a dlib::matrix object capable of representing column 
+                  vectors
+
+            INITIAL VALUE
+                - in_vector_size() == 0
+                - out_vector_size() == 0
+                - means().size() == 0
+                - std_devs().size() == 0
+                - pca_matrix().size() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents something that can learn to normalize a set 
+                of column vectors.  In particular, normalized column vectors should 
+                have zero mean and a variance of one.  
+
+                Also, this object uses principal component analysis for the purposes 
+                of reducing the number of elements in a vector.  
+
+            THREAD SAFETY
+                Note that this object contains a cached matrix object it uses 
+                to store intermediate results for normalization.  This avoids
+                needing to reallocate it every time this object performs normalization
+                but also makes it non-thread safe.  So make sure you don't share
+                instances of this object between threads. 
+        !*/
+
+    public:
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef matrix<scalar_type,0,1,mem_manager_type> result_type;
+
+        template <typename vector_type>
+        void train (
+            const vector_type& samples,
+            const double eps = 0.99
+        );
+        /*!
+            requires
+                - 0 < eps <= 1
+                - samples.size() > 0
+                - samples == a column matrix or something convertible to a column 
+                  matrix via mat().  Also, x should contain 
+                  matrix_type objects that represent nonempty column vectors.
+                - samples does not contain any infinite or NaN values
+            ensures
+                - This object has learned how to normalize vectors that look like
+                  vectors in the given set of samples.  
+                - Principal component analysis is performed to find a transform 
+                  that might reduce the number of output features. 
+                - #in_vector_size() == samples(0).nr()
+                - 0 < #out_vector_size() <= samples(0).nr()
+                - eps is a number that controls how "lossy" the pca transform will be.
+                  Large values of eps result in #out_vector_size() being larger and
+                  smaller values of eps result in #out_vector_size() being smaller. 
+                - #means() == mean(samples)
+                - #std_devs() == reciprocal(sqrt(variance(samples)));
+                - #pca_matrix() == the PCA transform matrix that is out_vector_size()
+                  rows by in_vector_size() columns.
+        !*/
+
+        long in_vector_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of rows that input vectors are
+                  required to contain if they are to be normalized by
+                  this object.
+        !*/
+
+        long out_vector_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of rows in the normalized vectors
+                  that come out of this object.
+        !*/
+
+        const matrix<scalar_type,0,1,mem_manager_type>& means (
+        ) const;
+        /*!
+            ensures               
+                - returns a matrix M such that:
+                    - M.nc() == 1
+                    - M.nr() == in_vector_size()
+                    - M(i) == the mean of the ith input feature shown to train()
+        !*/
+
+        const matrix<scalar_type,0,1,mem_manager_type>& std_devs (
+        ) const;
+        /*!
+            ensures               
+                - returns a matrix SD such that:
+                    - SD.nc() == 1
+                    - SD.nr() == in_vector_size()
+                    - SD(i) == the reciprocal of the standard deviation of the ith 
+                      input feature shown to train() 
+        !*/
+ 
+        const matrix<scalar_type,0,0,mem_manager_type>& pca_matrix (
+        ) const;
+        /*!
+            ensures
+                - returns a matrix PCA such that:
+                    - PCA.nr() == out_vector_size()
+                    - PCA.nc() == in_vector_size()
+                    - PCA == the principal component analysis transformation 
+                      matrix 
+        !*/
+
+        const result_type& operator() (
+            const matrix_type& x
+        ) const;
+        /*!
+            requires
+                - x.nr() == in_vector_size()
+                - x.nc() == 1
+            ensures
+                - returns a normalized version of x, call it Z, that has the 
+                  following properties: 
+                    - Z.nr() == out_vector_size()
+                    - Z.nc() == 1
+                    - the mean of each element of Z is 0 
+                    - the variance of each element of Z is 1
+                    - Z == pca_matrix()*pointwise_multiply(x-means(), std_devs());
+        !*/
+
+        void swap (
+            vector_normalizer_pca& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+    };
+
+    template <
+        typename matrix_type
+        >
+    inline void swap (
+        vector_normalizer_pca<matrix_type>& a, 
+        vector_normalizer_pca<matrix_type>& b 
+    ) { a.swap(b); }   
+    /*!
+        provides a global swap function
+    !*/
+
+    template <
+        typename matrix_type,
+        >
+    void deserialize (
+        vector_normalizer_pca<matrix_type>& item, 
+        std::istream& in
+    );   
+    /*!
+        provides deserialization support 
+    !*/
+
+    template <
+        typename matrix_type,
+        >
+    void serialize (
+        const vector_normalizer_pca<matrix_type>& item, 
+        std::ostream& out 
+    );   
+    /*!
+        provides serialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STATISTICs_ABSTRACT_
+
diff --git a/ml/dlib/dlib/statistics/vector_normalizer_frobmetric.h b/ml/dlib/dlib/statistics/vector_normalizer_frobmetric.h
new file mode 100644
index 000000000..690370f80
--- /dev/null
+++ b/ml/dlib/dlib/statistics/vector_normalizer_frobmetric.h
@@ -0,0 +1,618 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_
+#define DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_
+
+#include "vector_normalizer_frobmetric_abstract.h"
+#include "../matrix.h"
+#include "../optimization.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    struct frobmetric_training_sample 
+    {
+        matrix_type anchor_vect;
+        std::vector<matrix_type> near_vects;
+        std::vector<matrix_type> far_vects;
+
+        unsigned long num_triples (
+        ) const { return near_vects.size() * far_vects.size(); }
+
+        void clear()
+        {
+            near_vects.clear();
+            far_vects.clear();
+        }
+    };
+
+    template <
+        typename matrix_type
+        >
+    void serialize(const frobmetric_training_sample<matrix_type>& item, std::ostream& out)
+    {
+        int version = 1;
+        serialize(version, out);
+        serialize(item.anchor_vect, out);
+        serialize(item.near_vects, out);
+        serialize(item.far_vects, out);
+    }
+
+    template <
+        typename matrix_type
+        >
+    void deserialize(frobmetric_training_sample<matrix_type>& item, std::istream& in)
+    {
+        int version = 0;
+        deserialize(version, in);
+        if (version != 1)
+            throw serialization_error("Unexpected version found while deserializing dlib::frobmetric_training_sample.");
+        deserialize(item.anchor_vect, in);
+        deserialize(item.near_vects, in);
+        deserialize(item.far_vects, in);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class vector_normalizer_frobmetric
+    {
+
+    public:
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef matrix_type result_type;
+
+    private:
+        struct compact_frobmetric_training_sample 
+        {
+            std::vector<matrix_type> near_vects;
+            std::vector<matrix_type> far_vects;
+        };
+
+        struct objective
+        {
+            objective (
+                const std::vector<compact_frobmetric_training_sample>& samples_,
+                matrix<double,0,0,mem_manager_type>& Aminus_,
+                const matrix<double,0,1,mem_manager_type>& bias_ 
+            ) : samples(samples_), Aminus(Aminus_), bias(bias_) {}
+            
+            double operator()(const matrix<double,0,1,mem_manager_type>& u) const
+            {
+                long idx = 0;
+                const long dims = samples[0].far_vects[0].size();
+                // Here we compute \hat A from the paper, which we refer to as just A in
+                // the code.  
+                matrix<double,0,0,mem_manager_type> A(dims,dims);
+                A = 0;
+                std::vector<double> ufar, unear;
+                for (unsigned long i = 0; i < samples.size(); ++i)
+                {
+                    ufar.assign(samples[i].far_vects.size(),0);
+                    unear.assign(samples[i].near_vects.size(),0);
+                    for (unsigned long j = 0; j < unear.size(); ++j)
+                    {
+                        for (unsigned long k = 0; k < ufar.size(); ++k)
+                        {
+                            const double val = u(idx++);
+                            ufar[k] -= val;
+                            unear[j] += val;
+                        }
+                    }
+                    for (unsigned long j = 0; j < unear.size(); ++j)
+                        A += unear[j]*samples[i].near_vects[j]*trans(samples[i].near_vects[j]);
+                    for (unsigned long j = 0; j < ufar.size(); ++j)
+                        A += ufar[j]*samples[i].far_vects[j]*trans(samples[i].far_vects[j]);
+                }
+
+                eigenvalue_decomposition<matrix<double,0,0,mem_manager_type> > ed(make_symmetric(A));
+                Aminus = ed.get_pseudo_v()*diagm(upperbound(ed.get_real_eigenvalues(),0))*trans(ed.get_pseudo_v());
+                // Do this to avoid numeric instability later on since the above
+                // computation can make Aminus slightly non-symmetric.
+                Aminus = make_symmetric(Aminus);
+
+                return dot(u,bias) - 0.5*sum(squared(Aminus));
+            }
+
+        private:
+            const std::vector<compact_frobmetric_training_sample>& samples;
+            matrix<double,0,0,mem_manager_type>& Aminus;
+            const matrix<double,0,1,mem_manager_type>& bias;
+        };
+
+        struct derivative
+        {
+            derivative (
+                unsigned long num_triples_,
+                const std::vector<compact_frobmetric_training_sample>& samples_,
+                matrix<double,0,0,mem_manager_type>& Aminus_,
+                const matrix<double,0,1,mem_manager_type>& bias_ 
+            ) : num_triples(num_triples_), samples(samples_), Aminus(Aminus_), bias(bias_) {}
+            
+            matrix<double,0,1,mem_manager_type> operator()(const matrix<double,0,1,mem_manager_type>& ) const
+            {
+                // Note that Aminus is a function of u (the argument to this function), but
+                // since Aminus will have been computed already by the most recent call to
+                // the objective function we don't need to do anything with u.  We can just
+                // use Aminus right away.
+                matrix<double,0,1,mem_manager_type> grad(num_triples);
+                
+                long idx = 0;
+                std::vector<double> ufar, unear;
+                for (unsigned long i = 0; i < samples.size(); ++i)
+                {
+                    ufar.resize(samples[i].far_vects.size());
+                    unear.resize(samples[i].near_vects.size());
+
+                    for (unsigned long j = 0; j < unear.size(); ++j)
+                        unear[j] = sum(pointwise_multiply(Aminus, samples[i].near_vects[j]*trans(samples[i].near_vects[j])));
+                    for (unsigned long j = 0; j < ufar.size(); ++j)
+                        ufar[j] = sum(pointwise_multiply(Aminus, samples[i].far_vects[j]*trans(samples[i].far_vects[j])));
+
+                    for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j)
+                    {
+                        for (unsigned long k = 0; k < samples[i].far_vects.size(); ++k)
+                        {
+                            grad(idx) = bias(idx) + ufar[k]-unear[j];
+                            idx++;
+                        }
+                    }
+                }
+
+                return grad;
+            }
+
+        private:
+            const unsigned long num_triples;
+            const std::vector<compact_frobmetric_training_sample>& samples;
+            matrix<double,0,0,mem_manager_type>& Aminus;
+            const matrix<double,0,1,mem_manager_type>& bias;
+        };
+
+
+        class custom_stop_strategy
+        {
+        public:
+            custom_stop_strategy(
+                double C_,
+                double eps_,
+                bool be_verbose_,
+                unsigned long max_iter_
+            ) 
+            {
+                _c = C_;
+
+                _cur_iter = 0;
+                _gradient_thresh = eps_;
+                _max_iter = max_iter_;
+                _verbose = be_verbose_;
+            }
+
+            template <typename T>
+            bool should_continue_search (
+                const T& u,
+                const double ,
+                const T& grad
+            ) 
+            {
+                ++_cur_iter;
+
+                double max_gradient = 0;
+                for (long i = 0; i < grad.size(); ++i)
+                {
+                    const bool at_lower_bound = (0 >= u(i) && grad(i) > 0);
+                    const bool at_upper_bound = (_c/grad.size() <= u(i) && grad(i) < 0);
+                    if (!at_lower_bound && !at_upper_bound)
+                        max_gradient = std::max(std::abs(grad(i)), max_gradient);
+                }
+
+                if (_verbose)
+                {
+                    std::cout << "iteration: " << _cur_iter << "   max_gradient: "<< max_gradient << std::endl;
+                }
+
+                // Only stop when the largest non-bound-constrained element of the gradient
+                // is lower than the threshold.
+                if (max_gradient < _gradient_thresh)
+                    return false;
+
+                // Check if we have hit the max allowable number of iterations.  
+                if (_cur_iter > _max_iter)
+                {
+                    return false;
+                }
+
+                return true;
+            }
+
+        private:
+            bool _verbose;
+
+            unsigned long _max_iter;
+            unsigned long _cur_iter;
+            double _c;
+            double _gradient_thresh;
+        };
+
+    public:
+        vector_normalizer_frobmetric (
+        )
+        {
+            verbose = false;
+            eps = 0.1;
+            C = 1;
+            max_iter = 5000;
+            _use_identity_matrix_prior = false;
+        }
+
+        bool uses_identity_matrix_prior (
+        ) const
+        {
+            return _use_identity_matrix_prior;
+        }
+
+        void set_uses_identity_matrix_prior (
+            bool use_prior
+        )
+        {
+            _use_identity_matrix_prior = use_prior;
+        }
+
+        void be_verbose(
+        )
+        {
+            verbose = true;
+        }
+
+        void set_epsilon (
+            double eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void vector_normalizer_frobmetric::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        double get_epsilon (
+        ) const 
+        {
+            return eps;
+        }
+
+        void set_c (
+            double C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void vector_normalizer_frobmetric::set_c()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+        }
+
+        void set_max_iterations (
+            unsigned long max_iterations
+        )
+        {
+            max_iter = max_iterations;
+        }
+
+        unsigned long get_max_iterations (
+        ) const
+        {
+            return max_iter;
+        }
+
+        double get_c (
+        ) const
+        {
+            return C;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+       
+        void train (
+            const std::vector<frobmetric_training_sample<matrix_type> >& samples
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(samples.size() > 0,
+                "\tvoid vector_normalizer_frobmetric::train()"
+                << "\n\t you have to give a nonempty set of samples to this function"
+                );
+#ifdef ENABLE_ASSERTS
+            {
+                const long dims = samples[0].anchor_vect.size();
+                DLIB_ASSERT(dims != 0,
+                    "\tvoid vector_normalizer_frobmetric::train()"
+                    << "\n\t The dimension of the input vectors can't be zero."
+                    );
+                for (unsigned long i = 0; i < samples.size(); ++i)
+                {
+                    DLIB_ASSERT(is_col_vector(samples[i].anchor_vect), 
+                        "\tvoid vector_normalizer_frobmetric::train()"
+                        << "\n\t Invalid inputs were given to this function."
+                        << "\n\t i: " << i
+                        );
+                    DLIB_ASSERT(samples[i].anchor_vect.size() == dims, 
+                        "\tvoid vector_normalizer_frobmetric::train()"
+                        << "\n\t Invalid inputs were given to this function."
+                        << "\n\t i:    " << i
+                        << "\n\t dims: " << dims
+                        << "\n\t samples[i].anchor_vect.size(): " << samples[i].anchor_vect.size()
+                        );
+
+                    DLIB_ASSERT(samples[i].num_triples() != 0,
+                        "\tvoid vector_normalizer_frobmetric::train()"
+                        << "\n\t It is illegal for a training sample to have no data in it"
+                        << "\n\t i:    " << i
+                    );
+                    for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j)
+                    {
+                        DLIB_ASSERT(is_col_vector(samples[i].near_vects[j]), 
+                            "\tvoid vector_normalizer_frobmetric::train()"
+                            << "\n\t Invalid inputs were given to this function."
+                            << "\n\t i: " << i
+                            << "\n\t j: " << j
+                            );
+                        DLIB_ASSERT(samples[i].near_vects[j].size() == dims, 
+                            "\tvoid vector_normalizer_frobmetric::train()"
+                            << "\n\t Invalid inputs were given to this function."
+                            << "\n\t i:    " << i
+                            << "\n\t j:    " << j
+                            << "\n\t dims: " << dims
+                            << "\n\t samples[i].near_vects[j].size(): " << samples[i].near_vects[j].size()
+                            );
+                    }
+                    for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j)
+                    {
+                        DLIB_ASSERT(is_col_vector(samples[i].far_vects[j]), 
+                            "\tvoid vector_normalizer_frobmetric::train()"
+                            << "\n\t Invalid inputs were given to this function."
+                            << "\n\t i: " << i
+                            << "\n\t j: " << j
+                            );
+                        DLIB_ASSERT(samples[i].far_vects[j].size() == dims, 
+                            "\tvoid vector_normalizer_frobmetric::train()"
+                            << "\n\t Invalid inputs were given to this function."
+                            << "\n\t i:    " << i
+                            << "\n\t j:    " << j
+                            << "\n\t dims: " << dims
+                            << "\n\t samples[i].far_vects[j].size(): " << samples[i].far_vects[j].size()
+                            );
+                    }
+                }
+            }
+#endif // end ENABLE_ASSERTS
+
+
+            // compute the mean sample
+            m = 0;
+            for (unsigned long i = 0; i < samples.size(); ++i)
+                m += samples[i].anchor_vect;
+            m /= samples.size();
+
+            DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer_frobmetric::train() have infinite or NaN values");
+
+            // Now we need to find tform.  So we setup the optimization problem and run it
+            // over the next few lines of code.
+            unsigned long num_triples = 0;
+            for (unsigned long i = 0; i < samples.size(); ++i)
+                num_triples += samples[i].near_vects.size()*samples[i].far_vects.size();
+
+            matrix<double,0,1,mem_manager_type> u(num_triples);
+            matrix<double,0,1,mem_manager_type> bias(num_triples);
+            u = 0;
+            bias = 1;
+
+
+            // precompute all the anchor_vect to far_vects/near_vects pairs
+            std::vector<compact_frobmetric_training_sample> data(samples.size());
+            unsigned long cnt = 0;
+            std::vector<double> far_norm, near_norm;
+            for (unsigned long i = 0; i < data.size(); ++i)
+            {
+                far_norm.clear();
+                near_norm.clear();
+                data[i].far_vects.reserve(samples[i].far_vects.size());
+                data[i].near_vects.reserve(samples[i].near_vects.size());
+                for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j)
+                {
+                    data[i].far_vects.push_back(samples[i].anchor_vect - samples[i].far_vects[j]);
+                    if (_use_identity_matrix_prior)
+                        far_norm.push_back(length_squared(data[i].far_vects.back()));
+                }
+                for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j)
+                {
+                    data[i].near_vects.push_back(samples[i].anchor_vect - samples[i].near_vects[j]);
+                    if (_use_identity_matrix_prior)
+                        near_norm.push_back(length_squared(data[i].near_vects.back()));
+                }
+
+                // Note that this loop only executes if _use_identity_matrix_prior == true.
+                for (unsigned long j = 0; j < near_norm.size(); ++j)
+                {
+                    for (unsigned long k = 0; k < far_norm.size(); ++k)
+                    {
+                        bias(cnt++) = 1 - (far_norm[k] - near_norm[j]);
+                    }
+                }
+            }
+
+            // Now run the main part of the algorithm
+            matrix<double,0,0,mem_manager_type> Aminus;
+            find_max_box_constrained(lbfgs_search_strategy(10),
+                                     custom_stop_strategy(C, eps, verbose, max_iter),
+                                     objective(data, Aminus, bias),
+                                     derivative(num_triples, data, Aminus, bias),
+                                     u, 0, C/num_triples);
+
+
+            // What we need is the optimal Aminus which is a function of u.  So we already
+            // have what we need and just need to put it into tform.
+            eigenvalue_decomposition<matrix<double,0,0,mem_manager_type> > ed(make_symmetric(-Aminus));
+            matrix<double,0,1,mem_manager_type> eigs = ed.get_real_eigenvalues();
+            // But first, discard the components that are zero to within the machine epsilon.
+            const double tol = max(eigs)*std::numeric_limits<double>::epsilon();
+            for (long i = 0; i < eigs.size(); ++i)
+            {
+                if (eigs(i) < tol)
+                    eigs(i) = 0;
+            }
+            if (_use_identity_matrix_prior)
+                tform = matrix_cast<scalar_type>(identity_matrix(Aminus) + diagm(sqrt(eigs))*trans(ed.get_pseudo_v()));
+            else
+                tform = matrix_cast<scalar_type>(diagm(sqrt(eigs))*trans(ed.get_pseudo_v()));
+
+            // Pre-apply the transform to m so we don't have to do it inside operator()
+            // every time it's called.
+            m = tform*m;
+        }
+
+        long in_vector_size (
+        ) const
+        {
+            return m.nr();
+        }
+
+        long out_vector_size (
+        ) const
+        {
+            return m.nr();
+        }
+
+        const matrix<scalar_type,0,1,mem_manager_type>& transformed_means (
+        ) const
+        {
+            return m;
+        }
+
+        const matrix<scalar_type,0,0,mem_manager_type>& transform (
+        ) const
+        {
+            return tform;
+        }
+
+        const result_type& operator() (
+            const matrix_type& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(in_vector_size() != 0 && in_vector_size() == x.size() && 
+                is_col_vector(x) == true,
+                "\tmatrix vector_normalizer_frobmetric::operator()"
+                << "\n\t you have given invalid arguments to this function"
+                << "\n\t in_vector_size(): " << in_vector_size()
+                << "\n\t x.size():         " << x.size()
+                << "\n\t is_col_vector(x): " << is_col_vector(x)
+                << "\n\t this:             " << this
+                );
+
+            temp_out = tform*x-m;
+            return temp_out;
+        }
+
+        template <typename mt>
+        friend void deserialize (
+            vector_normalizer_frobmetric<mt>& item, 
+            std::istream& in
+        ); 
+
+        template <typename mt>
+        friend void serialize (
+            const vector_normalizer_frobmetric<mt>& item, 
+            std::ostream& out 
+        );
+
+    private:
+
+        // ------------------- private data members -------------------
+
+        matrix_type m;
+        matrix<scalar_type,0,0,mem_manager_type> tform;
+        bool verbose;
+        double eps;
+        double C;
+        unsigned long max_iter;
+        bool _use_identity_matrix_prior;
+
+        // This is just a temporary variable that doesn't contribute to the
+        // state of this object.
+        mutable matrix_type temp_out;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    void serialize (
+        const vector_normalizer_frobmetric<matrix_type>& item, 
+        std::ostream& out 
+    )
+    {
+        const int version = 2;
+        serialize(version, out);
+
+        serialize(item.m, out);
+        serialize(item.tform, out);
+        serialize(item.verbose, out);
+        serialize(item.eps, out);
+        serialize(item.C, out);
+        serialize(item.max_iter, out);
+        serialize(item._use_identity_matrix_prior, out);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    void deserialize (
+        vector_normalizer_frobmetric<matrix_type>& item, 
+        std::istream& in
+    )   
+    {
+        int version = 0;
+        deserialize(version, in);
+        if (version != 1 && version != 2)
+            throw serialization_error("Unsupported version found while deserializing dlib::vector_normalizer_frobmetric.");
+
+        deserialize(item.m, in);
+        deserialize(item.tform, in);
+        deserialize(item.verbose, in);
+        deserialize(item.eps, in);
+        deserialize(item.C, in);
+        deserialize(item.max_iter, in);
+        if (version == 2)
+            deserialize(item._use_identity_matrix_prior, in);
+        else
+            item._use_identity_matrix_prior = false;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_
+
diff --git a/ml/dlib/dlib/statistics/vector_normalizer_frobmetric_abstract.h b/ml/dlib/dlib/statistics/vector_normalizer_frobmetric_abstract.h
new file mode 100644
index 000000000..302628330
--- /dev/null
+++ b/ml/dlib/dlib/statistics/vector_normalizer_frobmetric_abstract.h
@@ -0,0 +1,328 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_
+#ifdef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_
+
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    struct frobmetric_training_sample 
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a training data sample for the
+                vector_normalizer_frobmetric object.  It defines a set of training triplets
+                relative to a single anchor_vect vector.  That is, it specifies that the
+                learned distance metric should satisfy num_triples() constraints which are,
+                for all valid i and j:
+                    length(T*anchor_vect-T*near_vects[i]) + 1 < length(T*anchor_vect - T*far_vects[j])
+                for some appropriate linear transformation T which will be learned by
+                vector_normalizer_frobmetric.
+        !*/
+
+        matrix_type anchor_vect;
+        std::vector<matrix_type> near_vects;
+        std::vector<matrix_type> far_vects;
+
+        unsigned long num_triples (
+        ) const { return near_vects.size() * far_vects.size(); }
+        /*!
+            ensures
+                - returns the number of training triplets defined by this object.
+        !*/
+
+        void clear()
+        /*!
+            ensures
+                - #near_vects.size() == 0
+                - #far_vects.size() == 0
+        !*/
+    };
+
+    template < typename matrix_type >
+    void serialize(const frobmetric_training_sample<matrix_type>& item, std::ostream& out)
+    template < typename matrix_type >
+    void deserialize(frobmetric_training_sample<matrix_type>& item, std::istream& in)
+    /*!
+        provides serialisation support.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    class vector_normalizer_frobmetric
+    {
+        /*!
+            REQUIREMENTS ON matrix_type
+                - must be a dlib::matrix object capable of representing column 
+                  vectors
+
+            INITIAL VALUE
+                - in_vector_size() == 0
+                - out_vector_size() == 0
+                - get_epsilon() == 0.1
+                - get_c() == 1
+                - get_max_iterations() == 5000
+                - This object is not verbose
+                - uses_identity_matrix_prior() == false
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for performing the FrobMetric distance metric
+                learning algorithm described in the following paper:
+                    A Scalable Dual Approach to Semidefinite Metric Learning
+                    By Chunhua Shen, Junae Kim, Lei Wang, in CVPR 2011
+
+                Therefore, this object is a tool that takes as input training triplets
+                (anchor_vect, near, far) of vectors and attempts to learn a linear
+                transformation T such that:
+                    length(T*anchor_vect-T*near) + 1 < length(T*anchor_vect - T*far)
+                That is, you give a bunch of anchor_vect vectors and for each anchor_vect
+                you specify some vectors which should be near to it and some that should be
+                far form it.  This object then tries to find a transformation matrix that
+                makes the "near" vectors close to their anchors while the "far" vectors are
+                farther away.
+
+            THREAD SAFETY
+                Note that this object contains a cached matrix object it uses 
+                to store intermediate results for normalization.  This avoids
+                needing to reallocate it every time this object performs normalization
+                but also makes it non-thread safe.  So make sure you don't share
+                instances of this object between threads. 
+        !*/
+
+    public:
+        typedef typename matrix_type::mem_manager_type mem_manager_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef matrix_type result_type;
+
+        vector_normalizer_frobmetric (
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        bool uses_identity_matrix_prior (
+        ) const;
+        /*!
+            ensures
+                - Normally this object will try and find a matrix transform() that
+                  minimizes sum(squared(transform())) but also fits the training data.
+                  However, if #uses_identity_matrix_prior() == true then it will instead
+                  try to find the transformation matrix that minimizes
+                  sum(squared(identity_matrix()-transform())).  That is, it will try to
+                  find the matrix most similar to the identity matrix that best fits the
+                  training data.
+        !*/
+
+        void set_uses_identity_matrix_prior (
+            bool use_prior
+        );
+        /*!
+            ensures
+                - #uses_identity_matrix_prior() == use_prior
+        !*/
+
+        void be_verbose(
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so the user can
+                  observe the progress of the train() routine.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out.
+        !*/
+
+        void set_epsilon (
+            double eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        double get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer to
+                  execute.
+        !*/
+
+        void set_c (
+            double C 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #set_c() == C
+        !*/
+
+        double get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the regularization parameter.  It is the parameter that
+                  determines the trade-off between trying to fit the training data exactly
+                  or allowing more errors but hopefully improving the generalization of the
+                  resulting distance metric.  Larger values encourage exact fitting while
+                  smaller values of C may encourage better generalization. 
+        !*/
+       
+        void set_max_iterations (
+            unsigned long max_iterations
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iterations
+        !*/
+
+        unsigned long get_max_iterations (
+        ) const;
+        /*!
+            ensures
+                - The train() routine uses an iterative numerical solver to find the best
+                  distance metric.  This function returns the maximum allowable number of
+                  iterations it will use before terminating.  Note that typically the
+                  solver terminates prior to the max iteration count limit due to the error
+                  dropping below get_epsilon().
+        !*/
+
+        void train (
+            const std::vector<frobmetric_training_sample<matrix_type> >& samples
+        );
+        /*!
+            requires
+                - samples.size() != 0
+                - All matrices inside samples (i.e. anchors and elements of near_vects and far_vects)
+                  are column vectors with the same non-zero dimension.
+                - All the vectors in samples contain finite values.
+                - All elements of samples contain data, specifically, for all valid i:
+                    - samples[i].num_triples() != 0
+            ensures
+                - learns a distance metric from the given training samples.  After train
+                  finishes you can use this object's operator() to transform vectors
+                  according to the learned distance metric.  In particular, we will have:
+                    - #transform() == The linear transformation learned by the FrobMetric
+                      learning procedure.
+                    - #in_vector_size() == samples[0].anchor_vect.size()
+                    - You can call (*this)(x) to transform a vector according to the learned 
+                      distance metric.  That is, it should generally be the case that:
+                        - length((*this)(anchor_vect) - (*this)(near)) + 1 < length((*this)(anchor_vect) - (*this)(far))
+                      for the anchor_vect, near, and far vectors in the training data.
+                    - #transformed_means() == the mean of the input anchor_vect vectors
+                      after being transformed by #transform()
+        !*/
+
+        long in_vector_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of rows that input vectors are required to contain if
+                  they are to be normalized by this object.
+        !*/
+
+        long out_vector_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of rows in the normalized vectors that come out of
+                  this object.
+                - The value returned is always in_vector_size().  So out_vector_size() is
+                  just provided to maintain interface consistency with other vector
+                  normalizer objects.  That is, the transformations applied by this object
+                  do not change the dimensionality of the vectors.
+        !*/
+
+        const matrix<scalar_type,0,1,mem_manager_type>& transformed_means (
+        ) const;
+        /*!
+            ensures
+                - returns a column vector V such that:
+                    - V.size() == in_vector_size()
+                    - V is a vector such that subtracting it from transformed vectors
+                      results in them having an expected value of 0.  Therefore, it is
+                      equal to transform() times the mean of the input anchor_vect vectors
+                      given to train().
+        !*/
+
+        const matrix<scalar_type,0,0,mem_manager_type>& transform (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the transformation matrix we learned during the last 
+                  call to train().
+                - The returned matrix is square and has in_vector_size() by in_vector_size()
+                  dimensions.
+        !*/
+
+        const result_type& operator() (
+            const matrix_type& x
+        ) const;
+        /*!
+            requires
+                - in_vector_size() != 0
+                - in_vector_size() == x.size()
+                - is_col_vector(x) == true
+            ensures
+                - returns a normalized version of x, call it Z, that has the following
+                  properties: 
+                    - Z == The result of applying the linear transform we learned during
+                      train() to the input vector x.
+                    - Z == transform()*x-transformed_means()
+                    - is_col_vector(Z) == true
+                    - Z.size() == x.size()
+                    - The expected value of each element of Z is 0.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    void serialize (
+        const vector_normalizer_frobmetric<matrix_type>& item, 
+        std::ostream& out 
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type
+        >
+    void deserialize (
+        vector_normalizer_frobmetric<matrix_type>& item, 
+        std::istream& in
+    );
+    /*!
+        provides deserialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_
+