diff options
Diffstat (limited to 'ml/dlib/dlib/statistics')
20 files changed, 7913 insertions, 0 deletions
diff --git a/ml/dlib/dlib/statistics/average_precision.h b/ml/dlib/dlib/statistics/average_precision.h new file mode 100644 index 000000000..6c2e7e0b1 --- /dev/null +++ b/ml/dlib/dlib/statistics/average_precision.h @@ -0,0 +1,66 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_AVERAGE_PREcISION_Hh_ +#define DLIB_AVERAGE_PREcISION_Hh_ + +#include "average_precision_abstract.h" +#include <vector> + + +namespace dlib +{ + namespace impl + { + inline bool get_bool_part ( + const bool& b + ) { return b; } + + template <typename T> + bool get_bool_part(const std::pair<T,bool>& item) { return item.second; } + } + +// ---------------------------------------------------------------------------------------- + + template <typename T, typename alloc> + double average_precision ( + const std::vector<T,alloc>& items, + unsigned long missing_relevant_items = 0 + ) + { + using namespace dlib::impl; + double relevant_count = 0; + // find the precision values + std::vector<double> precision; + for (unsigned long i = 0; i < items.size(); ++i) + { + if (get_bool_part(items[i])) + { + ++relevant_count; + precision.push_back(relevant_count / (i+1)); + } + } + + double precision_sum = 0; + double max_val = 0; + // now sum over the interpolated precision values + for (std::vector<double>::reverse_iterator i = precision.rbegin(); i != precision.rend(); ++i) + { + max_val = std::max(max_val, *i); + precision_sum += max_val; + } + + + relevant_count += missing_relevant_items; + + if (relevant_count != 0) + return precision_sum/relevant_count; + else + return 1; + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_AVERAGE_PREcISION_Hh_ + diff --git a/ml/dlib/dlib/statistics/average_precision_abstract.h b/ml/dlib/dlib/statistics/average_precision_abstract.h new file mode 100644 index 000000000..76c2c702a --- /dev/null +++ b/ml/dlib/dlib/statistics/average_precision_abstract.h @@ -0,0 +1,67 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_ +#ifdef DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_ + +#include <vector> + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename alloc + > + double average_precision ( + const std::vector<bool,alloc>& items, + unsigned long missing_relevant_items = 0 + ); + /*! + ensures + - Interprets items as a list of relevant and non-relevant items in a response + from an information retrieval system. In particular, items with a true value + are relevant and false items are non-relevant. This function then returns + the average precision of the ranking of the given items. For, example, the + ranking [true, true, true, true, false] would have an average precision of 1. + On the other hand, the ranking of [true false false true] would have an + average precision of 0.75 (because the first true has a precision of 1 and + the second true has a precision of 0.5, giving an average of 0.75). + - As a special case, if item contains no true elements then the average + precision is considered to be 1. + - Note that we use the interpolated precision. That is, the interpolated + precision at a recall value r is set to the maximum precision obtained at any + higher recall value. Or in other words, we interpolate the precision/recall + curve so that precision is monotonically decreasing. Therefore, the average + precision value returned by this function is the area under this interpolated + precision/recall curve. + - This function will add in missing_relevant_items number of items with a + precision of zero into the average value returned. For example, the average + precision of the ranking [true, true] if there are 2 missing relevant items + is 0.5. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + double average_precision ( + const std::vector<std::pair<T,bool>,alloc>& items, + unsigned long missing_relevant_items = 0 + ); + /*! + ensures + - this function is equivalent to copying the bool values from items into a + std::vector<bool> and then calling the above average_precision() routine on + it and returning the result. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_ + + diff --git a/ml/dlib/dlib/statistics/cca.h b/ml/dlib/dlib/statistics/cca.h new file mode 100644 index 000000000..21300ea8f --- /dev/null +++ b/ml/dlib/dlib/statistics/cca.h @@ -0,0 +1,186 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_CCA_hh_ +#define DLIB_CCA_hh_ + +#include "cca_abstract.h" +#include "../algs.h" +#include "../matrix.h" +#include "../sparse_vector.h" +#include "random_subset_selector.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + matrix<typename T::type,0,1> compute_correlations ( + const matrix_exp<T>& L, + const matrix_exp<T>& R + ) + { + DLIB_ASSERT( L.size() > 0 && R.size() > 0 && L.nr() == R.nr(), + "\t matrix compute_correlations()" + << "\n\t Invalid inputs were given to this function." + << "\n\t L.size(): " << L.size() + << "\n\t R.size(): " << R.size() + << "\n\t L.nr(): " << L.nr() + << "\n\t R.nr(): " << R.nr() + ); + + typedef typename T::type type; + matrix<type> A, B, C; + A = diag(trans(R)*L); + B = sqrt(diag(trans(L)*L)); + C = sqrt(diag(trans(R)*R)); + A = pointwise_multiply(A , reciprocal(pointwise_multiply(B,C))); + return A; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type, + typename T + > + matrix<T,0,1> impl_cca ( + const matrix_type& L, + const matrix_type& R, + matrix<T>& Ltrans, + matrix<T>& Rtrans, + unsigned long num_correlations, + unsigned long extra_rank, + unsigned long q, + unsigned long num_output_correlations, + double regularization + ) + { + matrix<T> Ul, Vl; + matrix<T> Ur, Vr; + matrix<T> U, V; + matrix<T,0,1> Dr, Dl, D; + + + // Note that we add a few more singular vectors in because it helps improve the + // final results if we run this part with a little higher rank than the final SVD. + svd_fast(L, Ul, Dl, Vl, num_correlations+extra_rank, q); + svd_fast(R, Ur, Dr, Vr, num_correlations+extra_rank, q); + + // Zero out singular values that are essentially zero so they don't cause numerical + // difficulties in the code below. + const double eps = std::numeric_limits<T>::epsilon()*std::max(max(Dr),max(Dl))*100; + Dl = round_zeros(Dl+regularization,eps); + Dr = round_zeros(Dr+regularization,eps); + + // This matrix is really small so we can do a normal full SVD on it. Note that we + // also throw away the columns of Ul and Ur corresponding to zero singular values. + svd3(diagm(Dl>0)*tmp(trans(Ul)*Ur)*diagm(Dr>0), U, D, V); + + // now throw away extra columns of the transformations. We do this in a way + // that keeps the directions that have the highest correlations. + matrix<T,0,1> temp = D; + rsort_columns(U, temp); + rsort_columns(V, D); + U = colm(U, range(0, num_output_correlations-1)); + V = colm(V, range(0, num_output_correlations-1)); + D = rowm(D, range(0, num_output_correlations-1)); + + Ltrans = Vl*inv(diagm(Dl))*U; + Rtrans = Vr*inv(diagm(Dr))*V; + + // Note that the D matrix contains the correlation values for the transformed + // vectors. However, when the L and R matrices have rank higher than + // num_correlations+extra_rank then the values in D become only approximate. + return D; + } + +// ---------------------------------------------------------------------------------------- + + template <typename T> + matrix<T,0,1> cca ( + const matrix<T>& L, + const matrix<T>& R, + matrix<T>& Ltrans, + matrix<T>& Rtrans, + unsigned long num_correlations, + unsigned long extra_rank = 5, + unsigned long q = 2, + double regularization = 0 + ) + { + DLIB_ASSERT( num_correlations > 0 && L.size() > 0 && R.size() > 0 && L.nr() == R.nr() && + regularization >= 0, + "\t matrix cca()" + << "\n\t Invalid inputs were given to this function." + << "\n\t num_correlations: " << num_correlations + << "\n\t regularization: " << regularization + << "\n\t L.size(): " << L.size() + << "\n\t R.size(): " << R.size() + << "\n\t L.nr(): " << L.nr() + << "\n\t R.nr(): " << R.nr() + ); + + using std::min; + const unsigned long n = min(num_correlations, (unsigned long)min(R.nr(),min(L.nc(), R.nc()))); + return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, n, regularization); + } + +// ---------------------------------------------------------------------------------------- + + template <typename sparse_vector_type, typename T> + matrix<T,0,1> cca ( + const std::vector<sparse_vector_type>& L, + const std::vector<sparse_vector_type>& R, + matrix<T>& Ltrans, + matrix<T>& Rtrans, + unsigned long num_correlations, + unsigned long extra_rank = 5, + unsigned long q = 2, + double regularization = 0 + ) + { + DLIB_ASSERT( num_correlations > 0 && L.size() == R.size() && + max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0 && + regularization >= 0, + "\t matrix cca()" + << "\n\t Invalid inputs were given to this function." + << "\n\t num_correlations: " << num_correlations + << "\n\t regularization: " << regularization + << "\n\t L.size(): " << L.size() + << "\n\t R.size(): " << R.size() + << "\n\t max_index_plus_one(L): " << max_index_plus_one(L) + << "\n\t max_index_plus_one(R): " << max_index_plus_one(R) + ); + + using std::min; + const unsigned long n = min(max_index_plus_one(L), max_index_plus_one(R)); + const unsigned long num_output_correlations = min(num_correlations, std::min<unsigned long>(R.size(),n)); + return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, num_output_correlations, regularization); + } + +// ---------------------------------------------------------------------------------------- + + template <typename sparse_vector_type, typename Rand_type, typename T> + matrix<T,0,1> cca ( + const random_subset_selector<sparse_vector_type,Rand_type>& L, + const random_subset_selector<sparse_vector_type,Rand_type>& R, + matrix<T>& Ltrans, + matrix<T>& Rtrans, + unsigned long num_correlations, + unsigned long extra_rank = 5, + unsigned long q = 2 + ) + { + return cca(L.to_std_vector(), R.to_std_vector(), Ltrans, Rtrans, num_correlations, extra_rank, q); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_CCA_hh_ + + diff --git a/ml/dlib/dlib/statistics/cca_abstract.h b/ml/dlib/dlib/statistics/cca_abstract.h new file mode 100644 index 000000000..8e0b4e742 --- /dev/null +++ b/ml/dlib/dlib/statistics/cca_abstract.h @@ -0,0 +1,191 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_CCA_AbSTRACT_Hh_ +#ifdef DLIB_CCA_AbSTRACT_Hh_ + +#include "../matrix/matrix_la_abstract.h" +#include "random_subset_selector_abstract.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + matrix<typename T::type,0,1> compute_correlations ( + const matrix_exp<T>& L, + const matrix_exp<T>& R + ); + /*! + requires + - L.size() > 0 + - R.size() > 0 + - L.nr() == R.nr() + ensures + - This function treats L and R as sequences of paired row vectors. It + then computes the correlation values between the elements of these + row vectors. In particular, we return a vector COR such that: + - COR.size() == L.nc() + - for all valid i: + - COR(i) == the correlation coefficient between the following sequence + of paired numbers: (L(k,i), R(k,i)) for k: 0 <= k < L.nr(). + Therefore, COR(i) is a value between -1 and 1 inclusive where 1 + indicates perfect correlation and -1 perfect anti-correlation. Note + that this function assumes the input data vectors have been centered + (i.e. made to have zero mean). If this is not the case then it will + report inaccurate results. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + matrix<T,0,1> cca ( + const matrix<T>& L, + const matrix<T>& R, + matrix<T>& Ltrans, + matrix<T>& Rtrans, + unsigned long num_correlations, + unsigned long extra_rank = 5, + unsigned long q = 2, + double regularization = 0 + ); + /*! + requires + - num_correlations > 0 + - L.size() > 0 + - R.size() > 0 + - L.nr() == R.nr() + - regularization >= 0 + ensures + - This function performs a canonical correlation analysis between the row + vectors in L and R. That is, it finds two transformation matrices, Ltrans + and Rtrans, such that row vectors in the transformed matrices L*Ltrans and + R*Rtrans are as correlated as possible. That is, we try to find two transforms + such that the correlation values returned by compute_correlations(L*Ltrans, R*Rtrans) + would be maximized. + - Let N == min(num_correlations, min(R.nr(),min(L.nc(),R.nc()))) + (This is the actual number of elements in the transformed vectors. + Therefore, note that you can't get more outputs than there are rows or + columns in the input matrices.) + - #Ltrans.nr() == L.nc() + - #Ltrans.nc() == N + - #Rtrans.nr() == R.nc() + - #Rtrans.nc() == N + - This function assumes the data vectors in L and R have already been centered + (i.e. we assume the vectors have zero means). However, in many cases it is + fine to use uncentered data with cca(). But if it is important for your + problem then you should center your data before passing it to cca(). + - This function works with reduced rank approximations of the L and R matrices. + This makes it fast when working with large matrices. In particular, we use + the svd_fast() routine to find reduced rank representations of the input + matrices by calling it as follows: svd_fast(L, U,D,V, num_correlations+extra_rank, q) + and similarly for R. This means that you can use the extra_rank and q + arguments to cca() to influence the accuracy of the reduced rank + approximation. However, the default values should work fine for most + problems. + - returns an estimate of compute_correlations(L*#Ltrans, R*#Rtrans). The + returned vector should exactly match the output of compute_correlations() + when the reduced rank approximation to L and R is accurate and regularization + is set to 0. However, if this is not the case then the return value of this + function will deviate from compute_correlations(L*#Ltrans, R*#Rtrans). This + deviation can be used to check if the reduced rank approximation is working + or you need to increase extra_rank. + - The dimensions of the output vectors produced by L*#Ltrans or R*#Rtrans are + ordered such that the dimensions with the highest correlations come first. + That is, after applying the transforms produced by cca() to a set of vectors + you will find that dimension 0 has the highest correlation, then dimension 1 + has the next highest, and so on. This also means that the list of numbers + returned from cca() will always be listed in decreasing order. + - This function performs the ridge regression version of Canonical Correlation + Analysis when regularization is set to a value > 0. In particular, larger + values indicate the solution should be more heavily regularized. This can be + useful when the dimensionality of the data is larger than the number of + samples. + - A good discussion of CCA can be found in the paper "Canonical Correlation + Analysis" by David Weenink. In particular, this function is implemented + using equations 29 and 30 from his paper. We also use the idea of doing CCA + on a reduced rank approximation of L and R as suggested by Paramveer S. + Dhillon in his paper "Two Step CCA: A new spectral method for estimating + vector models of words". + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename sparse_vector_type, + typename T + > + matrix<T,0,1> cca ( + const std::vector<sparse_vector_type>& L, + const std::vector<sparse_vector_type>& R, + matrix<T>& Ltrans, + matrix<T>& Rtrans, + unsigned long num_correlations, + unsigned long extra_rank = 5, + unsigned long q = 2, + double regularization = 0 + ); + /*! + requires + - num_correlations > 0 + - L.size() == R.size() + - max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0 + (i.e. L and R can't represent empty matrices) + - L and R must contain sparse vectors (see the top of dlib/svm/sparse_vector_abstract.h + for a definition of sparse vector) + - regularization >= 0 + ensures + - This is just an overload of the cca() function defined above. Except in this + case we take a sparse representation of the input L and R matrices rather than + dense matrices. Therefore, in this case, we interpret L and R as matrices + with L.size() rows, where each row is defined by a sparse vector. So this + function does exactly the same thing as the above cca(). + - Note that you can apply the output transforms to a sparse vector with the + following code: + sparse_matrix_vector_multiply(trans(Ltrans), your_sparse_vector) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename sparse_vector_type, + typename Rand_type, + typename T + > + matrix<T,0,1> cca ( + const random_subset_selector<sparse_vector_type,Rand_type>& L, + const random_subset_selector<sparse_vector_type,Rand_type>& R, + matrix<T>& Ltrans, + matrix<T>& Rtrans, + unsigned long num_correlations, + unsigned long extra_rank = 5, + unsigned long q = 2, + double regularization = 0 + ); + /*! + requires + - num_correlations > 0 + - L.size() == R.size() + - max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0 + (i.e. L and R can't represent empty matrices) + - L and R must contain sparse vectors (see the top of dlib/svm/sparse_vector_abstract.h + for a definition of sparse vector) + - regularization >= 0 + ensures + - returns cca(L.to_std_vector(), R.to_std_vector(), Ltrans, Rtrans, num_correlations, extra_rank, q) + (i.e. this is just a convenience function for calling the cca() routine when + your sparse vectors are contained inside a random_subset_selector rather than + a std::vector) + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_CCA_AbSTRACT_Hh_ + + diff --git a/ml/dlib/dlib/statistics/dpca.h b/ml/dlib/dlib/statistics/dpca.h new file mode 100644 index 000000000..cae784682 --- /dev/null +++ b/ml/dlib/dlib/statistics/dpca.h @@ -0,0 +1,541 @@ +// Copyright (C) 2009 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_DPCA_h_ +#define DLIB_DPCA_h_ + +#include "dpca_abstract.h" +#include <limits> +#include <cmath> +#include "../algs.h" +#include "../matrix.h" +#include <iostream> + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class discriminant_pca + { + /*! + INITIAL VALUE + - vect_size == 0 + - total_count == 0 + - between_count == 0 + - within_count == 0 + - between_weight == 1 + - within_weight == 1 + + CONVENTION + - vect_size == in_vector_size() + - total_count == the number of times add_to_total_variance() has been called. + - within_count == the number of times add_to_within_class_variance() has been called. + - between_count == the number of times add_to_between_class_variance() has been called. + - between_weight == between_class_weight() + - within_weight == within_class_weight() + + - if (total_count != 0) + - total_sum == the sum of all vectors given to add_to_total_variance() + - the covariance of all the elements given to add_to_total_variance() is given + by: + - let avg == total_sum/total_count + - covariance == total_cov/total_count - avg*trans(avg) + - if (within_count != 0) + - within_cov/within_count == the normalized within class scatter matrix + - if (between_count != 0) + - between_cov/between_count == the normalized between class scatter matrix + !*/ + + public: + + struct discriminant_pca_error : public error + { + discriminant_pca_error(const std::string& message): error(message) {} + }; + + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef typename matrix_type::layout_type layout_type; + typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; + typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; + + discriminant_pca ( + ) + { + clear(); + } + + void clear( + ) + { + total_count = 0; + between_count = 0; + within_count = 0; + + vect_size = 0; + + + between_weight = 1; + within_weight = 1; + + + total_sum.set_size(0); + between_cov.set_size(0,0); + total_cov.set_size(0,0); + within_cov.set_size(0,0); + } + + long in_vector_size ( + ) const + { + return vect_size; + } + + void set_within_class_weight ( + scalar_type weight + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(weight >= 0, + "\t void discriminant_pca::set_within_class_weight()" + << "\n\t You can't use negative weight values" + << "\n\t weight: " << weight + << "\n\t this: " << this + ); + + within_weight = weight; + } + + scalar_type within_class_weight ( + ) const + { + return within_weight; + } + + void set_between_class_weight ( + scalar_type weight + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(weight >= 0, + "\t void discriminant_pca::set_between_class_weight()" + << "\n\t You can't use negative weight values" + << "\n\t weight: " << weight + << "\n\t this: " << this + ); + + between_weight = weight; + } + + scalar_type between_class_weight ( + ) const + { + return between_weight; + } + + template <typename EXP1, typename EXP2> + void add_to_within_class_variance( + const matrix_exp<EXP1>& x, + const matrix_exp<EXP2>& y + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) && + x.size() == y.size() && + (in_vector_size() == 0 || x.size() == in_vector_size()), + "\t void discriminant_pca::add_to_within_class_variance()" + << "\n\t Invalid inputs were given to this function" + << "\n\t is_col_vector(x): " << is_col_vector(x) + << "\n\t is_col_vector(y): " << is_col_vector(y) + << "\n\t x.size(): " << x.size() + << "\n\t y.size(): " << y.size() + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t this: " << this + ); + + vect_size = x.size(); + if (within_count == 0) + { + within_cov = (x-y)*trans(x-y); + } + else + { + within_cov += (x-y)*trans(x-y); + } + ++within_count; + } + + template <typename EXP1, typename EXP2> + void add_to_between_class_variance( + const matrix_exp<EXP1>& x, + const matrix_exp<EXP2>& y + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) && + x.size() == y.size() && + (in_vector_size() == 0 || x.size() == in_vector_size()), + "\t void discriminant_pca::add_to_between_class_variance()" + << "\n\t Invalid inputs were given to this function" + << "\n\t is_col_vector(x): " << is_col_vector(x) + << "\n\t is_col_vector(y): " << is_col_vector(y) + << "\n\t x.size(): " << x.size() + << "\n\t y.size(): " << y.size() + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t this: " << this + ); + + vect_size = x.size(); + if (between_count == 0) + { + between_cov = (x-y)*trans(x-y); + } + else + { + between_cov += (x-y)*trans(x-y); + } + ++between_count; + } + + template <typename EXP> + void add_to_total_variance( + const matrix_exp<EXP>& x + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(is_col_vector(x) && (in_vector_size() == 0 || x.size() == in_vector_size()), + "\t void discriminant_pca::add_to_total_variance()" + << "\n\t Invalid inputs were given to this function" + << "\n\t is_col_vector(x): " << is_col_vector(x) + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t x.size(): " << x.size() + << "\n\t this: " << this + ); + + vect_size = x.size(); + if (total_count == 0) + { + total_cov = x*trans(x); + total_sum = x; + } + else + { + total_cov += x*trans(x); + total_sum += x; + } + ++total_count; + } + + const general_matrix dpca_matrix ( + const double eps = 0.99 + ) const + { + general_matrix dpca_mat; + general_matrix eigenvalues; + dpca_matrix(dpca_mat, eigenvalues, eps); + return dpca_mat; + } + + const general_matrix dpca_matrix_of_size ( + const long num_rows + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(0 < num_rows && num_rows <= in_vector_size(), + "\t general_matrix discriminant_pca::dpca_matrix_of_size()" + << "\n\t Invalid inputs were given to this function" + << "\n\t num_rows: " << num_rows + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t this: " << this + ); + + general_matrix dpca_mat; + general_matrix eigenvalues; + dpca_matrix_of_size(dpca_mat, eigenvalues, num_rows); + return dpca_mat; + } + + void dpca_matrix ( + general_matrix& dpca_mat, + general_matrix& eigenvalues, + const double eps = 0.99 + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(0 < eps && eps <= 1 && in_vector_size() != 0, + "\t void discriminant_pca::dpca_matrix()" + << "\n\t Invalid inputs were given to this function" + << "\n\t eps: " << eps + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t this: " << this + ); + + compute_dpca_matrix(dpca_mat, eigenvalues, eps, 0); + } + + void dpca_matrix_of_size ( + general_matrix& dpca_mat, + general_matrix& eigenvalues, + const long num_rows + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(0 < num_rows && num_rows <= in_vector_size(), + "\t general_matrix discriminant_pca::dpca_matrix_of_size()" + << "\n\t Invalid inputs were given to this function" + << "\n\t num_rows: " << num_rows + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t this: " << this + ); + + compute_dpca_matrix(dpca_mat, eigenvalues, 1, num_rows); + } + + void swap ( + discriminant_pca& item + ) + { + using std::swap; + swap(total_cov, item.total_cov); + swap(total_sum, item.total_sum); + swap(total_count, item.total_count); + swap(vect_size, item.vect_size); + swap(between_cov, item.between_cov); + + swap(between_count, item.between_count); + swap(between_weight, item.between_weight); + swap(within_cov, item.within_cov); + swap(within_count, item.within_count); + swap(within_weight, item.within_weight); + } + + friend void deserialize ( + discriminant_pca& item, + std::istream& in + ) + { + deserialize( item.total_cov, in); + deserialize( item.total_sum, in); + deserialize( item.total_count, in); + deserialize( item.vect_size, in); + deserialize( item.between_cov, in); + deserialize( item.between_count, in); + deserialize( item.between_weight, in); + deserialize( item.within_cov, in); + deserialize( item.within_count, in); + deserialize( item.within_weight, in); + } + + friend void serialize ( + const discriminant_pca& item, + std::ostream& out + ) + { + serialize( item.total_cov, out); + serialize( item.total_sum, out); + serialize( item.total_count, out); + serialize( item.vect_size, out); + serialize( item.between_cov, out); + serialize( item.between_count, out); + serialize( item.between_weight, out); + serialize( item.within_cov, out); + serialize( item.within_count, out); + serialize( item.within_weight, out); + } + + discriminant_pca operator+ ( + const discriminant_pca& item + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT((in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()) && + between_class_weight() == item.between_class_weight() && + within_class_weight() == item.within_class_weight(), + "\t discriminant_pca discriminant_pca::operator+()" + << "\n\t The two discriminant_pca objects being added must have compatible parameters" + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t item.in_vector_size(): " << item.in_vector_size() + << "\n\t between_class_weight(): " << between_class_weight() + << "\n\t item.between_class_weight(): " << item.between_class_weight() + << "\n\t within_class_weight(): " << within_class_weight() + << "\n\t item.within_class_weight(): " << item.within_class_weight() + << "\n\t this: " << this + ); + + discriminant_pca temp(item); + + // We need to make sure to ignore empty matrices. That's what these if statements + // are for. + + if (total_count != 0 && temp.total_count != 0) + { + temp.total_cov += total_cov; + temp.total_sum += total_sum; + temp.total_count += total_count; + } + else if (total_count != 0) + { + temp.total_cov = total_cov; + temp.total_sum = total_sum; + temp.total_count = total_count; + } + + if (between_count != 0 && temp.between_count != 0) + { + temp.between_cov += between_cov; + temp.between_count += between_count; + } + else if (between_count != 0) + { + temp.between_cov = between_cov; + temp.between_count = between_count; + } + + if (within_count != 0 && temp.within_count != 0) + { + temp.within_cov += within_cov; + temp.within_count += within_count; + } + else if (within_count != 0) + { + temp.within_cov = within_cov; + temp.within_count = within_count; + } + + return temp; + } + + discriminant_pca& operator+= ( + const discriminant_pca& rhs + ) + { + (*this + rhs).swap(*this); + return *this; + } + + private: + + void compute_dpca_matrix ( + general_matrix& dpca_mat, + general_matrix& eigenvalues, + const double eps, + long num_rows + ) const + { + general_matrix cov; + + // now combine the three measures of variance into a single matrix by using the + // within_weight and between_weight weights. + cov = get_total_covariance_matrix(); + if (within_count != 0) + cov -= within_weight*within_cov/within_count; + if (between_count != 0) + cov += between_weight*between_cov/between_count; + + + eigenvalue_decomposition<general_matrix> eig(make_symmetric(cov)); + + eigenvalues = eig.get_real_eigenvalues(); + dpca_mat = eig.get_pseudo_v(); + + // sort the eigenvalues and eigenvectors so that the biggest eigenvalues come first + rsort_columns(dpca_mat, eigenvalues); + + long num_vectors = 0; + if (num_rows == 0) + { + // Some of the eigenvalues might be negative. So first lets zero those out + // so they won't get considered. + eigenvalues = pointwise_multiply(eigenvalues > 0, eigenvalues); + // figure out how many eigenvectors we want in our dpca matrix + const double thresh = sum(eigenvalues)*eps; + double total = 0; + for (long r = 0; r < eigenvalues.size() && total < thresh; ++r) + { + // Don't even think about looking at eigenvalues that are 0. If we go this + // far then we have all we need. + if (eigenvalues(r) == 0) + break; + + ++num_vectors; + total += eigenvalues(r); + } + + if (num_vectors == 0) + throw discriminant_pca_error("While performing discriminant_pca, all eigenvalues were negative or 0"); + } + else + { + num_vectors = num_rows; + } + + + // So now we know we want to use num_vectors of the first eigenvectors. So + // pull those out and discard the rest. + dpca_mat = trans(colm(dpca_mat,range(0,num_vectors-1))); + + // also clip off the eigenvalues we aren't using + eigenvalues = rowm(eigenvalues, range(0,num_vectors-1)); + + } + + general_matrix get_total_covariance_matrix ( + ) const + /*! + ensures + - returns the covariance matrix of all the data given to the add_to_total_variance() + !*/ + { + // if we don't even know the dimensionality of the vectors we are dealing + // with then just return an empty matrix + if (vect_size == 0) + return general_matrix(); + + // we know the vector size but we have zero total covariance. + if (total_count == 0) + { + general_matrix temp(vect_size,vect_size); + temp = 0; + return temp; + } + + // In this case we actually have something to make a total covariance matrix out of. + // So do that. + column_matrix avg = total_sum/total_count; + + return total_cov/total_count - avg*trans(avg); + } + + general_matrix total_cov; + column_matrix total_sum; + scalar_type total_count; + + long vect_size; + + general_matrix between_cov; + scalar_type between_count; + scalar_type between_weight; + + general_matrix within_cov; + scalar_type within_count; + scalar_type within_weight; + }; + + template < + typename matrix_type + > + inline void swap ( + discriminant_pca<matrix_type>& a, + discriminant_pca<matrix_type>& b + ) { a.swap(b); } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_DPCA_h_ + + diff --git a/ml/dlib/dlib/statistics/dpca_abstract.h b/ml/dlib/dlib/statistics/dpca_abstract.h new file mode 100644 index 000000000..d9eef635b --- /dev/null +++ b/ml/dlib/dlib/statistics/dpca_abstract.h @@ -0,0 +1,365 @@ +// Copyright (C) 2009 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_DPCA_ABSTRaCT_ +#ifdef DLIB_DPCA_ABSTRaCT_ + +#include <limits> +#include <cmath> +#include "../matrix/matrix_abstract.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class discriminant_pca + { + /*! + REQUIREMENTS ON matrix_type + Must be some type of dlib::matrix. + + INITIAL VALUE + - in_vector_size() == 0 + - between_class_weight() == 1 + - within_class_weight() == 1 + + WHAT THIS OBJECT REPRESENTS + This object implements the Discriminant PCA technique described in the paper: + A New Discriminant Principal Component Analysis Method with Partial Supervision (2009) + by Dan Sun and Daoqiang Zhang + + This algorithm is basically a straightforward generalization of the classical PCA + technique to handle partially labeled data. It is useful if you want to learn a linear + dimensionality reduction rule using a bunch of data that is partially labeled. + + It functions by estimating three different scatter matrices. The first is the total scatter + matrix St (i.e. the total data covariance matrix), the second is the between class scatter + matrix Sb (basically a measure of the variance between data of different classes) and the + third is the within class scatter matrix Sw (a measure of the variance of data within the + same classes). + + Once these three matrices are estimated they are combined according to the following equation: + S = St + a*Sb - b*Sw + Where a and b are user supplied weights. Then the largest eigenvalues of the S matrix are + computed and their associated eigenvectors are returned as the output of this algorithm. + That is, the desired linear dimensionality reduction is given by the matrix with these + eigenvectors stored in its rows. + + Note that if a and b are set to 0 (or no labeled data is provided) then the output transformation + matrix is the same as the one produced by the classical PCA algorithm. + !*/ + + public: + + struct discriminant_pca_error : public error; + /*! + This exception is thrown if there is some error that prevents us from creating + a DPCA matrix. + !*/ + + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef typename matrix_type::layout_type layout_type; + typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; + typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; + + discriminant_pca ( + ); + /*! + ensures + - this object is properly initialized + !*/ + + void clear( + ); + /*! + ensures + - #*this has its initial value + !*/ + + long in_vector_size ( + ) const; + /*! + ensures + - if (this object has been presented with any input vectors) then + - returns the dimension of the column vectors used with this object + - else + - returns 0 + !*/ + + void set_within_class_weight ( + scalar_type weight + ); + /*! + requires + - weight >= 0 + ensures + - #within_class_weight() == weight + !*/ + + scalar_type within_class_weight ( + ) const; + /*! + ensures + - returns the weight used when combining the within class scatter matrix with + the other scatter matrices. + !*/ + + void set_between_class_weight ( + scalar_type weight + ); + /*! + requires + - weight >= 0 + ensures + - #between_class_weight() == weight + !*/ + + scalar_type between_class_weight ( + ) const; + /*! + ensures + - returns the weight used when combining the between class scatter matrix with + the other scatter matrices. + !*/ + + void add_to_within_class_variance( + const matrix_exp& x, + const matrix_exp& y + ); + /*! + requires + - is_col_vector(x) == true + - is_col_vector(y) == true + - x.size() == y.size() + - if (in_vector_size() != 0) then + - x.size() == y.size() == in_vector_size() + ensures + - #in_vector_size() == x.size() + - Adds (x-y)*trans(x-y) to the within class scatter matrix. + (i.e. the direction given by (x-y) is recorded as being a direction associated + with within class variance and is therefore unimportant and will be weighted + less in the final dimensionality reduction) + !*/ + + void add_to_between_class_variance( + const matrix_exp& x, + const matrix_exp& y + ); + /*! + requires + - is_col_vector(x) == true + - is_col_vector(y) == true + - x.size() == y.size() + - if (in_vector_size() != 0) then + - x.size() == y.size() == in_vector_size() + ensures + - #in_vector_size() == x.size() + - Adds (x-y)*trans(x-y) to the between class scatter matrix. + (i.e. the direction given by (x-y) is recorded as being a direction associated + with between class variance and is therefore important and will be weighted + higher in the final dimensionality reduction) + !*/ + + void add_to_total_variance( + const matrix_exp& x + ); + /*! + requires + - is_col_vector(x) == true + - if (in_vector_size() != 0) then + - x.size() == in_vector_size() + ensures + - #in_vector_size() == x.size() + - let M denote the centroid (or mean) of all the data. Then this function + Adds (x-M)*trans(x-M) to the total scatter matrix. + (i.e. the direction given by (x-M) is recorded as being a direction associated + with unlabeled variance and is therefore of default importance and will be weighted + as described in the discriminant_pca class description.) + !*/ + + const general_matrix dpca_matrix ( + const double eps = 0.99 + ) const; + /*! + requires + - 0 < eps <= 1 + - in_vector_size() != 0 + (i.e. you have to have given this object some data) + ensures + - computes and returns the matrix MAT given by dpca_matrix(MAT,eigen,eps). + That is, this function returns the dpca_matrix computed by the function + defined below. + - Note that MAT is the desired linear transformation matrix. That is, + multiplying a vector by MAT performs the desired linear dimensionality reduction. + throws + - discriminant_pca_error + This exception is thrown if we are unable to create the dpca_matrix for some + reason. For example, if only within class examples have been given or + within_class_weight() is very large then all eigenvalues will be negative and + that prevents this algorithm from working properly. + !*/ + + void dpca_matrix ( + general_matrix& dpca_mat, + general_matrix& eigenvalues, + const double eps = 0.99 + ) const; + /*! + requires + - 0 < eps <= 1 + - in_vector_size() != 0 + (i.e. you have to have given this object some data) + ensures + - is_col_vector(#eigenvalues) == true + - #dpca_mat.nr() == eigenvalues.size() + - #dpca_mat.nc() == in_vector_size() + - rowm(#dpca_mat,i) represents the ith eigenvector of the S matrix described + in the class description and its eigenvalue is given by eigenvalues(i). + - all values in #eigenvalues are > 0. Moreover, the eigenvalues are in + sorted order with the largest eigenvalue stored at eigenvalues(0). + - (#dpca_mat)*trans(#dpca_mat) == identity_matrix. + (i.e. the rows of the dpca_matrix are all unit length vectors and are mutually + orthogonal) + - Note that #dpca_mat is the desired linear transformation matrix. That is, + multiplying a vector by #dpca_mat performs the desired linear dimensionality + reduction. + - sum(#eigenvalues) will be equal to about eps times the total sum of all + positive eigenvalues in the S matrix described in this class's description. + This means that eps is a number that controls how "lossy" the dimensionality + reduction will be. Large values of eps result in more output dimensions + while smaller values result in fewer. + throws + - discriminant_pca_error + This exception is thrown if we are unable to create the dpca_matrix for some + reason. For example, if only within class examples have been given or + within_class_weight() is very large then all eigenvalues will be negative and + that prevents this algorithm from working properly. + !*/ + + const general_matrix dpca_matrix_of_size ( + const long num_rows + ); + /*! + requires + - 0 < num_rows <= in_vector_size() + ensures + - computes and returns the matrix MAT given by dpca_matrix_of_size(MAT,eigen,num_rows). + That is, this function returns the dpca_matrix computed by the function + defined below. + - Note that MAT is the desired linear transformation matrix. That is, + multiplying a vector by MAT performs the desired linear dimensionality + reduction to num_rows dimensions. + !*/ + + void dpca_matrix_of_size ( + general_matrix& dpca_mat, + general_matrix& eigenvalues, + const long num_rows + ); + /*! + requires + - 0 < num_rows <= in_vector_size() + ensures + - is_col_vector(#eigenvalues) == true + - #dpca_mat.nr() == eigenvalues.size() + - #dpca_mat.nr() == num_rows + - #dpca_mat.nc() == in_vector_size() + - rowm(#dpca_mat,i) represents the ith eigenvector of the S matrix described + in the class description and its eigenvalue is given by eigenvalues(i). + - The values in #eigenvalues might be positive or negative. Additionally, the + eigenvalues are in sorted order with the largest eigenvalue stored at + eigenvalues(0). + - (#dpca_mat)*trans(#dpca_mat) == identity_matrix. + (i.e. the rows of the dpca_matrix are all unit length vectors and are mutually + orthogonal) + - Note that #dpca_mat is the desired linear transformation matrix. That is, + multiplying a vector by #dpca_mat performs the desired linear dimensionality + reduction to num_rows dimensions. + !*/ + + discriminant_pca operator+ ( + const discriminant_pca& item + ) const; + /*! + requires + - in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size() + (i.e. the in_vector_size() of *this and item must match or one must be zero) + - between_class_weight() == item.between_class_weight() + - within_class_weight() == item.within_class_weight() + ensures + - returns a new discriminant_pca object that represents the combination of all + the measurements given to *this and item. That is, this function returns a + discriminant_pca object, R, that is equivalent to what you would obtain if all + modifying calls (e.g. the add_to_*() functions) to *this and item had instead + been done to R. + !*/ + + discriminant_pca& operator+= ( + const discriminant_pca& rhs + ); + /*! + requires + - in_vector_size() == 0 || rhs.in_vector_size() == 0 || in_vector_size() == rhs.in_vector_size() + (i.e. the in_vector_size() of *this and rhs must match or one must be zero) + - between_class_weight() == rhs.between_class_weight() + - within_class_weight() == rhs.within_class_weight() + ensures + - #*this == *item + rhs + - returns #*this + !*/ + + void swap ( + discriminant_pca& item + ); + /*! + ensures + - swaps *this and item + !*/ + + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + inline void swap ( + discriminant_pca<matrix_type>& a, + discriminant_pca<matrix_type>& b + ) { a.swap(b); } + /*! + provides a global swap function + !*/ + + template < + typename matrix_type, + > + void deserialize ( + discriminant_pca<matrix_type>& item, + std::istream& in + ); + /*! + provides deserialization support + !*/ + + template < + typename matrix_type, + > + void serialize ( + const discriminant_pca<matrix_type>& item, + std::ostream& out + ); + /*! + provides serialization support + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_DPCA_ABSTRaCT_ + diff --git a/ml/dlib/dlib/statistics/image_feature_sampling.h b/ml/dlib/dlib/statistics/image_feature_sampling.h new file mode 100644 index 000000000..f04f9926e --- /dev/null +++ b/ml/dlib/dlib/statistics/image_feature_sampling.h @@ -0,0 +1,82 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_IMAGE_FEATURE_SaMPLING_Hh_ +#define DLIB_IMAGE_FEATURE_SaMPLING_Hh_ + +#include "image_feature_sampling_abstract.h" +#include "../statistics.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type, + typename feature_extractor_type, + typename pyramid_type + > + random_subset_selector<typename feature_extractor_type::descriptor_type> randomly_sample_image_features ( + const image_array_type& images, + const pyramid_type& pyr, + const feature_extractor_type& fe_, + unsigned long num + ) + { + feature_extractor_type fe; + fe.copy_configuration(fe_); + random_subset_selector<typename feature_extractor_type::descriptor_type> basis; + basis.set_max_size(num); + + typedef typename image_array_type::type image_type; + image_type temp_img, temp_img2; + + for (unsigned long i = 0; i < images.size(); ++i) + { + bool at_pyramid_top = true; + while (true) + { + if (at_pyramid_top) + fe.load(images[i]); + else + fe.load(temp_img); + + if (fe.size() == 0) + break; + + for (long r = 0; r < fe.nr(); ++r) + { + for (long c = 0; c < fe.nc(); ++c) + { + if (basis.next_add_accepts()) + { + basis.add(fe(r,c)); + } + else + { + basis.add(); + } + } + } + + if (at_pyramid_top) + { + at_pyramid_top = false; + pyr(images[i], temp_img); + } + else + { + pyr(temp_img, temp_img2); + swap(temp_img2,temp_img); + } + } + } + return basis; + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_IMAGE_FEATURE_SaMPLING_Hh_ + diff --git a/ml/dlib/dlib/statistics/image_feature_sampling_abstract.h b/ml/dlib/dlib/statistics/image_feature_sampling_abstract.h new file mode 100644 index 000000000..b51ef5423 --- /dev/null +++ b/ml/dlib/dlib/statistics/image_feature_sampling_abstract.h @@ -0,0 +1,45 @@ +// Copyright (C) 2011 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_ +#ifdef DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_ + +#include "random_subset_selector_abstract.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename image_array_type, + typename feature_extractor_type, + typename pyramid_type + > + random_subset_selector<typename feature_extractor_type::descriptor_type> randomly_sample_image_features ( + const image_array_type& images, + const pyramid_type& pyr, + const feature_extractor_type& fe, + unsigned long num + ); + /*! + requires + - pyramid_type == a type compatible with the image pyramid objects defined + in dlib/image_transforms/image_pyramid_abstract.h + - feature_extractor_type == a local image feature extractor type such as the + dlib::hog_image + - image_array_type == an implementation of dlib/array/array_kernel_abstract.h + and it must contain image objects which can be passed to pyr() and fe.load() + and are swappable by global swap(). + ensures + - creates an image pyramid for each image in images and performs feature + extraction on each pyramid level. Then selects a random subsample of at + most num local feature vectors and returns it. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_ + + diff --git a/ml/dlib/dlib/statistics/lda.h b/ml/dlib/dlib/statistics/lda.h new file mode 100644 index 000000000..38de3fd1e --- /dev/null +++ b/ml/dlib/dlib/statistics/lda.h @@ -0,0 +1,237 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_LDA_Hh_ +#define DLIB_LDA_Hh_ + +#include "lda_abstract.h" +#include "../algs.h" +#include <map> +#include "../matrix.h" +#include <vector> + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + namespace impl + { + + inline std::map<unsigned long,unsigned long> make_class_labels( + const std::vector<unsigned long>& row_labels + ) + { + std::map<unsigned long,unsigned long> class_labels; + for (unsigned long i = 0; i < row_labels.size(); ++i) + { + const unsigned long next = class_labels.size(); + if (class_labels.count(row_labels[i]) == 0) + class_labels[row_labels[i]] = next; + } + return class_labels; + } + + // ------------------------------------------------------------------------------------ + + template < + typename T + > + matrix<T,0,1> center_matrix ( + matrix<T>& X + ) + { + matrix<T,1> mean; + for (long r = 0; r < X.nr(); ++r) + mean += rowm(X,r); + mean /= X.nr(); + + for (long r = 0; r < X.nr(); ++r) + set_rowm(X,r) -= mean; + + return trans(mean); + } + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + void compute_lda_transform ( + matrix<T>& X, + matrix<T,0,1>& mean, + const std::vector<unsigned long>& row_labels, + unsigned long lda_dims = 500, + unsigned long extra_pca_dims = 200 + ) + { + std::map<unsigned long,unsigned long> class_labels = impl::make_class_labels(row_labels); + // LDA can only give out at most class_labels.size()-1 dimensions so don't try to + // compute more than that. + lda_dims = std::min<unsigned long>(lda_dims, class_labels.size()-1); + + // make sure requires clause is not broken + DLIB_CASSERT(class_labels.size() > 1, + "\t void compute_lda_transform()" + << "\n\t You can't call this function if the number of distinct class labels is less than 2." + ); + DLIB_CASSERT(X.size() != 0 && (long)row_labels.size() == X.nr() && lda_dims != 0, + "\t void compute_lda_transform()" + << "\n\t Invalid inputs were given to this function." + << "\n\t X.size(): " << X.size() + << "\n\t row_labels.size(): " << row_labels.size() + << "\n\t lda_dims: " << lda_dims + ); + + + mean = impl::center_matrix(X); + // Do PCA to reduce dims + matrix<T> pu,pw,pv; + svd_fast(X, pu, pw, pv, lda_dims+extra_pca_dims, 4); + pu.set_size(0,0); // free RAM, we don't need pu. + X = X*pv; + + + matrix<T> class_means(class_labels.size(), X.nc()); + class_means = 0; + matrix<T,0,1> class_counts(class_labels.size()); + class_counts = 0; + + // First compute the means of each class + for (unsigned long i = 0; i < row_labels.size(); ++i) + { + const unsigned long class_idx = class_labels[row_labels[i]]; + set_rowm(class_means,class_idx) += rowm(X,i); + class_counts(class_idx)++; + } + class_means = inv(diagm(class_counts))*class_means; + // subtract means from the data + for (unsigned long i = 0; i < row_labels.size(); ++i) + { + const unsigned long class_idx = class_labels[row_labels[i]]; + set_rowm(X,i) -= rowm(class_means,class_idx); + } + + // Note that we are using the formulas from the paper Using Discriminant + // Eigenfeatures for Image Retrieval by Swets and Weng. + matrix<T> Sw = trans(X)*X; + matrix<T> Sb = trans(class_means)*class_means; + matrix<T> A, H; + matrix<T,0,1> W; + svd3(Sw, A, W, H); + W = sqrt(W); + W = reciprocal(lowerbound(W,max(W)*1e-5)); + A = trans(H*diagm(W))*Sb*H*diagm(W); + matrix<T> v,s,u; + svd3(A, v, s, u); + matrix<T> tform = H*diagm(W)*u; + // pick out only the number of dimensions we are supposed to for the output, unless + // we should just keep them all, then don't do anything. + if ((long)lda_dims <= tform.nc()) + { + rsort_columns(tform, s); + tform = colm(tform, range(0, lda_dims-1)); + } + + X = trans(pv*tform); + mean = X*mean; + } + +// ---------------------------------------------------------------------------------------- + + inline std::pair<double,double> equal_error_rate ( + const std::vector<double>& low_vals, + const std::vector<double>& high_vals + ) + { + std::vector<std::pair<double,int> > temp; + temp.reserve(low_vals.size()+high_vals.size()); + for (unsigned long i = 0; i < low_vals.size(); ++i) + temp.push_back(std::make_pair(low_vals[i], -1)); + for (unsigned long i = 0; i < high_vals.size(); ++i) + temp.push_back(std::make_pair(high_vals[i], +1)); + + std::sort(temp.begin(), temp.end()); + + if (temp.size() == 0) + return std::make_pair(0,0); + + double thresh = temp[0].first; + + unsigned long num_low_wrong = low_vals.size(); + unsigned long num_high_wrong = 0; + double low_error = num_low_wrong/(double)low_vals.size(); + double high_error = num_high_wrong/(double)high_vals.size(); + for (unsigned long i = 0; i < temp.size() && high_error < low_error; ++i) + { + thresh = temp[i].first; + if (temp[i].second > 0) + { + num_high_wrong++; + high_error = num_high_wrong/(double)high_vals.size(); + } + else + { + num_low_wrong--; + low_error = num_low_wrong/(double)low_vals.size(); + } + } + + return std::make_pair((low_error+high_error)/2, thresh); + } + +// ---------------------------------------------------------------------------------------- + + struct roc_point + { + double true_positive_rate; + double false_positive_rate; + double detection_threshold; + }; + + inline std::vector<roc_point> compute_roc_curve ( + const std::vector<double>& true_detections, + const std::vector<double>& false_detections + ) + { + DLIB_CASSERT(true_detections.size() != 0); + DLIB_CASSERT(false_detections.size() != 0); + + std::vector<std::pair<double,int> > temp; + temp.reserve(true_detections.size()+false_detections.size()); + for (unsigned long i = 0; i < true_detections.size(); ++i) + temp.push_back(std::make_pair(true_detections[i], +1)); + for (unsigned long i = 0; i < false_detections.size(); ++i) + temp.push_back(std::make_pair(false_detections[i], -1)); + + std::sort(temp.rbegin(), temp.rend()); + + + std::vector<roc_point> roc_curve; + roc_curve.reserve(temp.size()); + + double num_false_included = 0; + double num_true_included = 0; + for (unsigned long i = 0; i < temp.size(); ++i) + { + if (temp[i].second > 0) + num_true_included++; + else + num_false_included++; + + roc_point p; + p.true_positive_rate = num_true_included/true_detections.size(); + p.false_positive_rate = num_false_included/false_detections.size(); + p.detection_threshold = temp[i].first; + roc_curve.push_back(p); + } + + return roc_curve; + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_LDA_Hh_ + diff --git a/ml/dlib/dlib/statistics/lda_abstract.h b/ml/dlib/dlib/statistics/lda_abstract.h new file mode 100644 index 000000000..ab9fd7a32 --- /dev/null +++ b/ml/dlib/dlib/statistics/lda_abstract.h @@ -0,0 +1,118 @@ +// Copyright (C) 2014 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_LDA_ABSTRACT_Hh_ +#ifdef DLIB_LDA_ABSTRACT_Hh_ + +#include <map> +#include "../matrix.h" +#include <vector> + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + void compute_lda_transform ( + matrix<T>& X, + matrix<T,0,1>& M, + const std::vector<unsigned long>& row_labels, + unsigned long lda_dims = 500, + unsigned long extra_pca_dims = 200 + ); + /*! + requires + - X.size() != 0 + - row_labels.size() == X.nr() + - The number of distinct values in row_labels > 1 + - lda_dims != 0 + ensures + - We interpret X as a collection X.nr() of input vectors, where each row of X + is one of the vectors. + - We interpret row_labels[i] as the label of the vector rowm(X,i). + - This function performs the dimensionality reducing version of linear + discriminant analysis. That is, you give it a set of labeled vectors and it + returns a linear transform that maps the input vectors into a new space that + is good for distinguishing between the different classes. In particular, + this function finds matrices Z and M such that: + - Given an input vector x, Z*x-M, is the transformed version of x. That is, + Z*x-M maps x into a space where x vectors that share the same class label + are near each other. + - Z*x-M results in the transformed vectors having zero expected mean. + - Z.nr() <= lda_dims + (it might be less than lda_dims if there are not enough distinct class + labels to support lda_dims dimensions). + - Z.nc() == X.nc() + - We overwrite the input matrix X and store Z in it. Therefore, the + outputs of this function are in X and M. + - In order to deal with very high dimensional inputs, we perform PCA internally + to map the input vectors into a space of at most lda_dims+extra_pca_dims + prior to performing LDA. + !*/ + +// ---------------------------------------------------------------------------------------- + + std::pair<double,double> equal_error_rate ( + const std::vector<double>& low_vals, + const std::vector<double>& high_vals + ); + /*! + ensures + - This function finds a threshold T that best separates the elements of + low_vals from high_vals by selecting the threshold with equal error rate. In + particular, we try to pick a threshold T such that: + - for all valid i: + - high_vals[i] >= T + - for all valid i: + - low_vals[i] < T + Where the best T is determined such that the fraction of low_vals >= T is the + same as the fraction of high_vals < T. + - Let ERR == the equal error rate. I.e. the fraction of times low_vals >= T + and high_vals < T. Note that 0 <= ERR <= 1. + - returns make_pair(ERR,T) + !*/ + +// ---------------------------------------------------------------------------------------- + + struct roc_point + { + double true_positive_rate; + double false_positive_rate; + double detection_threshold; + }; + + std::vector<roc_point> compute_roc_curve ( + const std::vector<double>& true_detections, + const std::vector<double>& false_detections + ); + /*! + requires + - true_detections.size() != 0 + - false_detections.size() != 0 + ensures + - This function computes the ROC curve (receiver operating characteristic) + curve of the given data. Therefore, we interpret true_detections as + containing detection scores for a bunch of true detections and + false_detections as detection scores from a bunch of false detections. A + perfect detector would always give higher scores to true detections than to + false detections, resulting in a true positive rate of 1 and a false positive + rate of 0, for some appropriate detection threshold. + - Returns an array, ROC, such that: + - ROC.size() == true_detections.size()+false_detections.size() + - for all valid i: + - If you were to accept all detections with a score >= ROC[i].detection_threshold + then you would obtain a true positive rate of ROC[i].true_positive_rate and a + false positive rate of ROC[i].false_positive_rate. + - ROC is ordered such that low detection rates come first. That is, the + curve is swept from a high detection threshold to a low threshold. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_LDA_ABSTRACT_Hh_ + + diff --git a/ml/dlib/dlib/statistics/random_subset_selector.h b/ml/dlib/dlib/statistics/random_subset_selector.h new file mode 100644 index 000000000..17492363d --- /dev/null +++ b/ml/dlib/dlib/statistics/random_subset_selector.h @@ -0,0 +1,372 @@ +// Copyright (C) 2010 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_RANDOM_SUBSeT_SELECTOR_H_ +#define DLIB_RANDOM_SUBSeT_SELECTOR_H_ + +#include "random_subset_selector_abstract.h" +#include "../rand.h" +#include <vector> +#include "../algs.h" +#include "../string.h" +#include "../serialize.h" +#include "../matrix/matrix_mat.h" +#include <iostream> + +namespace dlib +{ + template < + typename T, + typename Rand_type = dlib::rand + > + class random_subset_selector + { + /*! + INITIAL VALUE + - _max_size == 0 + - items.size() == 0 + - count == 0 + - _next_add_accepts == false + + CONVENTION + - count == the number of times add() has been called since the last + time this object was empty. + - items.size() == size() + - max_size() == _max_size + - next_add_accepts() == _next_add_accepts + !*/ + public: + typedef T type; + typedef T value_type; + typedef default_memory_manager mem_manager_type; + typedef Rand_type rand_type; + + typedef typename std::vector<T>::iterator iterator; + typedef typename std::vector<T>::const_iterator const_iterator; + + + random_subset_selector ( + ) + { + _max_size = 0; + make_empty(); + } + + void set_seed(const std::string& value) + { + rnd.set_seed(value); + } + + void make_empty ( + ) + { + items.resize(0); + count = 0; + update_next_add_accepts(); + } + + const std::vector<T>& to_std_vector( + ) const { return items; } + + size_t size ( + ) const + { + return items.size(); + } + + void set_max_size ( + unsigned long new_max_size + ) + { + items.reserve(new_max_size); + make_empty(); + _max_size = new_max_size; + update_next_add_accepts(); + } + + unsigned long max_size ( + ) const + { + return _max_size; + } + + T& operator[] ( + unsigned long idx + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(idx < size(), + "\tvoid random_subset_selector::operator[]()" + << "\n\t idx is out of range" + << "\n\t idx: " << idx + << "\n\t size(): " << size() + << "\n\t this: " << this + ); + + return items[idx]; + } + + const T& operator[] ( + unsigned long idx + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(idx < size(), + "\tvoid random_subset_selector::operator[]()" + << "\n\t idx is out of range" + << "\n\t idx: " << idx + << "\n\t size(): " << size() + << "\n\t this: " << this + ); + + return items[idx]; + } + + iterator begin() { return items.begin(); } + const_iterator begin() const { return items.begin(); } + iterator end() { return items.end(); } + const_iterator end() const { return items.end(); } + + bool next_add_accepts ( + ) const + { + return _next_add_accepts; + } + + void add ( + const T& new_item + ) + { + if (items.size() < _max_size) + { + items.push_back(new_item); + // swap into a random place + exchange(items[rnd.get_random_32bit_number()%items.size()], items.back()); + } + else if (_next_add_accepts) + { + // pick a random element of items and replace it. + items[rnd.get_random_32bit_number()%items.size()] = new_item; + } + + update_next_add_accepts(); + ++count; + } + + void add ( + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(next_add_accepts() == false, + "\tvoid random_subset_selector::add()" + << "\n\t You should be calling the version of add() that takes an argument" + << "\n\t this: " << this + ); + + update_next_add_accepts(); + ++count; + } + + void swap ( + random_subset_selector& a + ) + { + items.swap(a.items); + std::swap(_max_size, a._max_size); + std::swap(count, a.count); + rnd.swap(a.rnd); + std::swap(_next_add_accepts, a._next_add_accepts); + } + + template <typename T1, typename T2> + friend void serialize ( + const random_subset_selector<T1,T2>& item, + std::ostream& out + ); + + template <typename T1, typename T2> + friend void deserialize ( + random_subset_selector<T1,T2>& item, + std::istream& in + ); + + private: + + void update_next_add_accepts ( + ) + { + if (items.size() < _max_size) + { + _next_add_accepts = true; + } + else if (_max_size == 0) + { + _next_add_accepts = false; + } + else + { + // At this point each element of items has had an equal chance of being in this object. + // In particular, the probability that each arrived here is currently items.size()/count. + // We need to be able to say that, after this function ends, the probability of any + // particular object ending up in items is items.size()/(count+1). So this means that + // we should decide to add a new item into items with this probability. Also, if we do + // so then we pick one of the current items and replace it at random with the new item. + + // Make me a random 64 bit number. This might seem excessive but I want this object + // to be able to handle an effectively infinite number of calls to add(). So count + // might get very large and we need to deal with that properly. + const unsigned long num1 = rnd.get_random_32bit_number(); + const unsigned long num2 = rnd.get_random_32bit_number(); + uint64 num = num1; + num <<= 32; + num |= num2; + + num %= (count+1); + + _next_add_accepts = (num < items.size()); + } + + } + + std::vector<T> items; + unsigned long _max_size; + uint64 count; + + rand_type rnd; + + bool _next_add_accepts; + + }; + + template < + typename T, + typename rand_type + > + void swap ( + random_subset_selector<T,rand_type>& a, + random_subset_selector<T,rand_type>& b + ) { a.swap(b); } + +// ---------------------------------------------------------------------------------------- + + template <typename T1, typename T2> + void serialize ( + const random_subset_selector<T1,T2>& item, + std::ostream& out + ) + { + serialize(item.items, out); + serialize(item._max_size, out); + serialize(item.count, out); + serialize(item.rnd, out); + serialize(item._next_add_accepts, out); + } + + template <typename T1, typename T2> + void deserialize ( + random_subset_selector<T1,T2>& item, + std::istream& in + ) + { + deserialize(item.items, in); + deserialize(item._max_size, in); + deserialize(item.count, in); + deserialize(item.rnd, in); + deserialize(item._next_add_accepts, in); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + random_subset_selector<T> randomly_subsample ( + const std::vector<T,alloc>& samples, + unsigned long num + ) + { + random_subset_selector<T> subset; + subset.set_max_size(num); + for (unsigned long i = 0; i < samples.size(); ++i) + subset.add(samples[i]); + return subset; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc, + typename U + > + random_subset_selector<T> randomly_subsample ( + const std::vector<T,alloc>& samples, + unsigned long num, + const U& random_seed + ) + { + random_subset_selector<T> subset; + subset.set_seed(cast_to_string(random_seed)); + subset.set_max_size(num); + for (unsigned long i = 0; i < samples.size(); ++i) + subset.add(samples[i]); + return subset; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + random_subset_selector<T> randomly_subsample ( + const random_subset_selector<T>& samples, + unsigned long num + ) + { + random_subset_selector<T> subset; + subset.set_max_size(num); + for (unsigned long i = 0; i < samples.size(); ++i) + subset.add(samples[i]); + return subset; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename U + > + random_subset_selector<T> randomly_subsample ( + const random_subset_selector<T>& samples, + unsigned long num, + const U& random_seed + ) + { + random_subset_selector<T> subset; + subset.set_seed(cast_to_string(random_seed)); + subset.set_max_size(num); + for (unsigned long i = 0; i < samples.size(); ++i) + subset.add(samples[i]); + return subset; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + const matrix_op<op_array_to_mat<random_subset_selector<T> > > mat ( + const random_subset_selector<T>& m + ) + { + typedef op_array_to_mat<random_subset_selector<T> > op; + return matrix_op<op>(op(m)); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_RANDOM_SUBSeT_SELECTOR_H_ + + diff --git a/ml/dlib/dlib/statistics/random_subset_selector_abstract.h b/ml/dlib/dlib/statistics/random_subset_selector_abstract.h new file mode 100644 index 000000000..96f8b545d --- /dev/null +++ b/ml/dlib/dlib/statistics/random_subset_selector_abstract.h @@ -0,0 +1,388 @@ +// Copyright (C) 2010 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_ +#ifdef DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_ + +#include <vector> +#include "../rand/rand_kernel_abstract.h" +#include "../algs.h" +#include "../string.h" + +namespace dlib +{ + template < + typename T, + typename Rand_type = dlib::rand + > + class random_subset_selector + { + /*! + REQUIREMENTS ON T + T must be a copyable type + + REQUIREMENTS ON Rand_type + must be an implementation of dlib/rand/rand_kernel_abstract.h + + INITIAL VALUE + - size() == 0 + - max_size() == 0 + - next_add_accepts() == false + + WHAT THIS OBJECT REPRESENTS + This object is a tool to help you select a random subset of a large body of data. + In particular, it is useful when the body of data is too large to fit into memory. + + So for example, suppose you have 1000000 data samples and you want to select a + random subset of size 1000. Then you could do that as follows: + + random_subset_selector<sample_type> rand_subset; + rand_subset.set_max_size(1000) + for (int i = 0; i < 1000000; ++i) + rand_subset.add( get_next_data_sample()); + + + At the end of the for loop you will have your random subset of 1000 samples. And by + random I mean that each of the 1000000 data samples has an equal chance of ending + up in the rand_subset object. + + + Note that the above example calls get_next_data_sample() for each data sample. This + may be inefficient since most of the data samples are just ignored. An alternative + method that doesn't require you to load each sample can also be used. Consider the + following: + + random_subset_selector<sample_type> rand_subset; + rand_subset.set_max_size(1000) + for (int i = 0; i < 1000000; ++i) + if (rand_subset.next_add_accepts()) + rand_subset.add(get_data_sample(i)); + else + rand_subset.add() + + In the above example we only actually fetch the data sample into memory if we + know that the rand_subset would include it into the random subset. Otherwise, + we can just call the empty add(). + + + Finally, note that the random_subset_selector uses a deterministic pseudo-random + number generator under the hood. Moreover, the default constructor always seeds + the random number generator in the same way. So unless you call set_seed() + each instance of the random_subset_selector will function identically. + !*/ + public: + typedef T type; + typedef T value_type; + typedef default_memory_manager mem_manager_type; + typedef Rand_type rand_type; + + typedef typename std::vector<T>::iterator iterator; + typedef typename std::vector<T>::const_iterator const_iterator; + + random_subset_selector ( + ); + /*! + ensures + - this object is properly initialized + !*/ + + void set_seed( + const std::string& value + ); + /*! + ensures + - sets the seed of the random number generator that is embedded in + this object to the given value. + !*/ + + void make_empty ( + ); + /*! + ensures + - #size() == 0 + !*/ + + size_t size ( + ) const; + /*! + ensures + - returns the number of items of type T currently contained in this object + !*/ + + void set_max_size ( + unsigned long new_max_size + ); + /*! + ensures + - #max_size() == new_max_size + - #size() == 0 + !*/ + + unsigned long max_size ( + ) const; + /*! + ensures + - returns the maximum allowable size for this object + !*/ + + T& operator[] ( + unsigned long idx + ); + /*! + requires + - idx < size() + ensures + - returns a non-const reference to the idx'th element of this object + !*/ + + const T& operator[] ( + unsigned long idx + ) const; + /*! + requires + - idx < size() + ensures + - returns a const reference to the idx'th element of this object + !*/ + + bool next_add_accepts ( + ) const; + /*! + ensures + - if (the next call to add(item) will result in item being included + into *this) then + - returns true + - Note that the next item will always be accepted if size() < max_size(). + - else + - returns false + - Note that the next item will never be accepted if max_size() == 0. + !*/ + + void add ( + const T& new_item + ); + /*! + ensures + - if (next_add_accepts()) then + - places new_item into *this object at a random location + - if (size() < max_size()) then + - #size() == size() + 1 + - #next_add_accepts() == The updated information about the acceptance + of the next call to add() + !*/ + + void add ( + ); + /*! + requires + - next_add_accepts() == false + ensures + - This function does nothing but update the value of #next_add_accepts() + !*/ + + iterator begin( + ); + /*! + ensures + - if (size() > 0) then + - returns an iterator referring to the first element in + this container. + - else + - returns end() + !*/ + + const_iterator begin( + ) const; + /*! + ensures + - if (size() > 0) then + - returns a const_iterator referring to the first element in + this container. + - else + - returns end() + !*/ + + iterator end( + ); + /*! + ensures + - returns an iterator that represents one past the end of + this container + !*/ + + const_iterator end( + ) const; + /*! + ensures + - returns an iterator that represents one past the end of + this container + !*/ + + const std::vector<T>& to_std_vector( + ) const; + /*! + ensures + - returns a const reference to the underlying std::vector<T> that contains + all elements in this object. That is, this function returns a vector, V, + which has the following properties: + - V.size() == this->size() + - V.begin() == this->begin() + - V.end() == this->end() + !*/ + + void swap ( + random_subset_selector& item + ); + /*! + ensures + - swaps *this and item + !*/ + + }; + + template < + typename T, + typename rand_type + > + void swap ( + random_subset_selector<T,rand_type>& a, + random_subset_selector<T,rand_type>& b + ) { a.swap(b); } + /*! + provides global swap support + !*/ + + template < + typename T, + typename rand_type + > + void serialize ( + const random_subset_selector<T,rand_type>& item, + std::ostream& out + ); + /*! + provides serialization support + !*/ + + template < + typename T, + typename rand_type + > + void deserialize ( + random_subset_selector<T,rand_type>& item, + std::istream& in + ); + /*! + provides deserialization support + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + random_subset_selector<T> randomly_subsample ( + const std::vector<T,alloc>& samples, + unsigned long num + ); + /*! + ensures + - returns a random subset R such that: + - R contains a random subset of the given samples + - R.size() == min(num, samples.size()) + - R.max_size() == num + - The random number generator used by this function will always be + initialized in the same way. I.e. this function will always pick + the same random subsample if called multiple times. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc, + typename U + > + random_subset_selector<T> randomly_subsample ( + const std::vector<T,alloc>& samples, + unsigned long num, + const U& random_seed + ); + /*! + requires + - random_seed must be convertible to a string by dlib::cast_to_string() + ensures + - returns a random subset R such that: + - R contains a random subset of the given samples + - R.size() == min(num, samples.size()) + - R.max_size() == num + - The given random_seed will be used to initialize the random number + generator used by this function. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + random_subset_selector<T> randomly_subsample ( + const random_subset_selector<T>& samples, + unsigned long num + ); + /*! + ensures + - returns a random subset R such that: + - R contains a random subset of the given samples + - R.size() == min(num, samples.size()) + - R.max_size() == num + - The random number generator used by this function will always be + initialized in the same way. I.e. this function will always pick + the same random subsample if called multiple times. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename U + > + random_subset_selector<T> randomly_subsample ( + const random_subset_selector<T>& samples, + unsigned long num, + const U& random_seed + ); + /*! + requires + - random_seed must be convertible to a string by dlib::cast_to_string() + ensures + - returns a random subset R such that: + - R contains a random subset of the given samples + - R.size() == min(num, samples.size()) + - R.max_size() == num + - The given random_seed will be used to initialize the random number + generator used by this function. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + const matrix_exp mat ( + const random_subset_selector<T>& m + ); + /*! + ensures + - returns a matrix R such that: + - is_col_vector(R) == true + - R.size() == m.size() + - for all valid r: + R(r) == m[r] + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_ + diff --git a/ml/dlib/dlib/statistics/running_gradient.h b/ml/dlib/dlib/statistics/running_gradient.h new file mode 100644 index 000000000..d3f1b3ddf --- /dev/null +++ b/ml/dlib/dlib/statistics/running_gradient.h @@ -0,0 +1,370 @@ +// Copyright (C) 2016 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_RuNNING_GRADIENT_Hh_ +#define DLIB_RuNNING_GRADIENT_Hh_ + +#include "running_gradient_abstract.h" +#include "../algs.h" +#include "../serialize.h" +#include <cmath> +#include "../matrix.h" +#include <algorithm> + + +namespace dlib +{ + class running_gradient + { + public: + + running_gradient ( + ) + { + clear(); + } + + void clear( + ) + { + n = 0; + R = identity_matrix<double>(2)*1e6; + w = 0; + residual_squared = 0; + } + + double current_n ( + ) const + { + return n; + } + + void add( + double y + ) + { + matrix<double,2,1> x; + x = n, 1; + + // Do recursive least squares computations + const double temp = 1 + trans(x)*R*x; + matrix<double,2,1> tmp = R*x; + R = R - (tmp*trans(tmp))/temp; + // R should always be symmetric. This line improves numeric stability of this algorithm. + R = 0.5*(R + trans(R)); + w = w + R*x*(y - trans(x)*w); + + // Also, recursively keep track of the residual error between the given value + // and what our linear predictor outputs. + residual_squared = residual_squared + std::pow((y - trans(x)*w),2.0)*temp; + + ++n; + } + + double gradient ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\t double running_gradient::gradient()" + << "\n\t You must add more values into this object before calling this function." + << "\n\t this: " << this + ); + + return w(0); + } + + double intercept ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 0, + "\t double running_gradient::intercept()" + << "\n\t You must add more values into this object before calling this function." + << "\n\t this: " << this + ); + + return w(1); + } + double standard_error ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 2, + "\t double running_gradient::standard_error()" + << "\n\t You must add more values into this object before calling this function." + << "\n\t this: " << this + ); + + + const double s = residual_squared/(n-2); + const double adjust = 12.0/(std::pow(current_n(),3.0) - current_n()); + return std::sqrt(s*adjust); + } + + double probability_gradient_less_than ( + double thresh + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 2, + "\t double running_gradient::probability_gradient_less_than()" + << "\n\t You must add more values into this object before calling this function." + << "\n\t this: " << this + ); + + return normal_cdf(thresh, gradient(), standard_error()); + } + + double probability_gradient_greater_than ( + double thresh + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 2, + "\t double running_gradient::probability_gradient_greater_than()" + << "\n\t You must add more values into this object before calling this function." + << "\n\t this: " << this + ); + + return 1-probability_gradient_less_than(thresh); + } + + friend void serialize (const running_gradient& item, std::ostream& out) + { + int version = 1; + serialize(version, out); + serialize(item.n, out); + serialize(item.R, out); + serialize(item.w, out); + serialize(item.residual_squared, out); + } + + friend void deserialize (running_gradient& item, std::istream& in) + { + int version = 0; + deserialize(version, in); + if (version != 1) + throw serialization_error("Unexpected version found while deserializing dlib::running_gradient."); + deserialize(item.n, in); + deserialize(item.R, in); + deserialize(item.w, in); + deserialize(item.residual_squared, in); + } + + private: + + static double normal_cdf(double value, double mean, double stddev) + { + if (stddev == 0) + { + if (value < mean) + return 0; + else if (value > mean) + return 1; + else + return 0.5; + } + value = (value-mean)/stddev; + return 0.5 * std::erfc(-value / std::sqrt(2.0)); + } + + double n; + matrix<double,2,2> R; + matrix<double,2,1> w; + double residual_squared; + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + double probability_gradient_less_than ( + const T& container, + double thresh + ) + { + running_gradient g; + for(auto&& v : container) + g.add(v); + + // make sure requires clause is not broken + DLIB_ASSERT(g.current_n() > 2, + "\t double probability_gradient_less_than()" + << "\n\t You need more than 2 elements in the given container to call this function." + ); + return g.probability_gradient_less_than(thresh); + } + + template < + typename T + > + double probability_gradient_greater_than ( + const T& container, + double thresh + ) + { + running_gradient g; + for(auto&& v : container) + g.add(v); + + // make sure requires clause is not broken + DLIB_ASSERT(g.current_n() > 2, + "\t double probability_gradient_greater_than()" + << "\n\t You need more than 2 elements in the given container to call this function." + ); + return g.probability_gradient_greater_than(thresh); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + double find_upper_quantile ( + const T& container_, + double quantile + ) + { + DLIB_CASSERT(0 <= quantile && quantile <= 1.0); + + // copy container into a std::vector + std::vector<double> container(container_.begin(), container_.end()); + + DLIB_CASSERT(container.size() > 0); + + size_t idx_upper = std::round((container.size()-1)*(1-quantile)); + + std::nth_element(container.begin(), container.begin()+idx_upper, container.end()); + auto upper_q = *(container.begin()+idx_upper); + return upper_q; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + size_t count_steps_without_decrease ( + const T& container, + double probability_of_decrease = 0.51 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1, + "\t size_t count_steps_without_decrease()" + << "\n\t probability_of_decrease: "<< probability_of_decrease + ); + + running_gradient g; + size_t count = 0; + size_t j = 0; + for (auto i = container.rbegin(); i != container.rend(); ++i) + { + ++j; + g.add(*i); + if (g.current_n() > 2) + { + // Note that this only looks backwards because we are looping over the + // container backwards. So here we are really checking if the gradient isn't + // decreasing. + double prob_decreasing = g.probability_gradient_greater_than(0); + // If we aren't confident things are decreasing. + if (prob_decreasing < probability_of_decrease) + count = j; + } + } + return count; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + size_t count_steps_without_decrease_robust ( + const T& container, + double probability_of_decrease = 0.51, + double quantile_discard = 0.10 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(0 <= quantile_discard && quantile_discard <= 1); + DLIB_ASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1, + "\t size_t count_steps_without_decrease_robust()" + << "\n\t probability_of_decrease: "<< probability_of_decrease + ); + + if (container.size() == 0) + return 0; + + const auto quantile_thresh = find_upper_quantile(container, quantile_discard); + + running_gradient g; + size_t count = 0; + size_t j = 0; + for (auto i = container.rbegin(); i != container.rend(); ++i) + { + ++j; + // ignore values that are too large + if (*i <= quantile_thresh) + g.add(*i); + + if (g.current_n() > 2) + { + // Note that this only looks backwards because we are looping over the + // container backwards. So here we are really checking if the gradient isn't + // decreasing. + double prob_decreasing = g.probability_gradient_greater_than(0); + // If we aren't confident things are decreasing. + if (prob_decreasing < probability_of_decrease) + count = j; + } + } + return count; + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + size_t count_steps_without_increase ( + const T& container, + double probability_of_increase = 0.51 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(0.5 < probability_of_increase && probability_of_increase < 1, + "\t size_t count_steps_without_increase()" + << "\n\t probability_of_increase: "<< probability_of_increase + ); + + running_gradient g; + size_t count = 0; + size_t j = 0; + for (auto i = container.rbegin(); i != container.rend(); ++i) + { + ++j; + g.add(*i); + if (g.current_n() > 2) + { + // Note that this only looks backwards because we are looping over the + // container backwards. So here we are really checking if the gradient isn't + // increasing. + double prob_increasing = g.probability_gradient_less_than(0); + // If we aren't confident things are increasing. + if (prob_increasing < probability_of_increase) + count = j; + } + } + return count; + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_RuNNING_GRADIENT_Hh_ + + diff --git a/ml/dlib/dlib/statistics/running_gradient_abstract.h b/ml/dlib/dlib/statistics/running_gradient_abstract.h new file mode 100644 index 000000000..a42e1c152 --- /dev/null +++ b/ml/dlib/dlib/statistics/running_gradient_abstract.h @@ -0,0 +1,276 @@ +// Copyright (C) 2016 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_ +#ifdef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_ + + +namespace dlib +{ + class running_gradient + { + /*! + WHAT THIS OBJECT REPRESENTS + This object is a tool for estimating if a noisy sequence of numbers is + trending up or down and by how much. It does this by finding the least + squares fit of a line to the data and then allows you to perform a + statistical test on the slope of that line. + !*/ + + public: + + running_gradient ( + ); + /*! + ensures + - #current_n() == 0 + !*/ + + void clear( + ); + /*! + ensures + - #current_n() == 0 + - this object has its initial value + - clears all memory of any previous data points + !*/ + + double current_n ( + ) const; + /*! + ensures + - returns the number of values given to this object by add(). + !*/ + + void add( + double y + ); + /*! + ensures + - Updates the gradient() and standard_error() estimates in this object + based on the new y value. + - #current_n() == current_n() + 1 + !*/ + + double gradient ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - If we consider the values given to add() as time series data, we can + estimate the rate-of-change of those values. That is, how much, + typically, do those values change from sample to sample? The gradient() + function returns the current estimate. It does this by finding the least + squares fit of a line to the data given to add() and returning the slope + of this line. + !*/ + + double intercept ( + ) const; + /*! + requires + - current_n() > 0 + ensures + - This class fits a line to the time series data given to add(). This + function returns the intercept of that line while gradient() returns the + slope of that line. This means that, for example, the next point that + add() will see, as predicted by this best fit line, is the value + intercept() + current_n()*gradient(). + !*/ + + double standard_error ( + ) const; + /*! + requires + - current_n() > 2 + ensures + - returns the standard deviation of the estimate of gradient(). + !*/ + + double probability_gradient_less_than ( + double thresh + ) const; + /*! + requires + - current_n() > 2 + ensures + - If we can assume the values given to add() are linearly related to each + other and corrupted by Gaussian additive noise then our estimate of + gradient() is a random variable with a mean value of gradient() and a + standard deviation of standard_error(). This lets us compute the + probability that the true gradient of the data is less than thresh, which + is what this function returns. + !*/ + + double probability_gradient_greater_than ( + double thresh + ) const; + /*! + requires + - current_n() > 2 + ensures + - returns 1-probability_gradient_less_than(thresh) + !*/ + + }; + + void serialize ( + const running_gradient& item, + std::ostream& out + ); + /*! + provides serialization support + !*/ + + void deserialize ( + running_gradient& item, + std::istream& in + ); + /*! + provides serialization support + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + double probability_gradient_less_than ( + const T& container, + double thresh + ); + /*! + requires + - container must be a container of double values that can be enumerated with a + range based for loop. + - The container must contain more than 2 elements. + ensures + - Puts all the elements of container into a running_gradient object, R, and + then returns R.probability_gradient_less_than(thresh). + !*/ + + template < + typename T + > + double probability_gradient_greater_than ( + const T& container, + double thresh + ); + /*! + requires + - container must be a container of double values that can be enumerated with a + range based for loop. + - The container must contain more than 2 elements. + ensures + - Puts all the elements of container into a running_gradient object, R, and + then returns R.probability_gradient_greater_than(thresh). + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + size_t count_steps_without_decrease ( + const T& container, + double probability_of_decrease = 0.51 + ); + /*! + requires + - container must be a container of double values that can be enumerated with + .rbegin() and .rend(). + - 0.5 < probability_of_decrease < 1 + ensures + - If you think of the contents of container as a potentially noisy time series, + then this function returns a count of how long the time series has gone + without noticeably decreasing in value. It does this by adding the + elements into a running_gradient object and counting how many elements, + starting with container.back(), that you need to examine before you are + confident that the series has been decreasing in value. Here, "confident of + decrease" means that the probability of decrease is >= probability_of_decrease. + - Setting probability_of_decrease to 0.51 means we count until we see even a + small hint of decrease, whereas a larger value of 0.99 would return a larger + count since it keeps going until it is nearly certain the time series is + decreasing. + - The max possible output from this function is container.size(). + !*/ + + template < + typename T + > + size_t count_steps_without_decrease_robust ( + const T& container, + double probability_of_decrease = 0.51, + double quantile_discard = 0.10 + ); + /*! + requires + - container must be a container of double values that can be enumerated with + .begin() and .end() as well as .rbegin() and .rend(). + - 0.5 < probability_of_decrease < 1 + - 0 <= quantile_discard <= 1 + ensures + - This function behaves just like + count_steps_without_decrease(container,probability_of_decrease) except that + it ignores values in container that are in the upper quantile_discard + quantile. So for example, if the quantile discard is 0.1 then the 10% + largest values in container are ignored. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + size_t count_steps_without_increase ( + const T& container, + double probability_of_increase = 0.51 + ); + /*! + requires + - container must be a container of double values that can be enumerated with + .rbegin() and .rend(). + - 0.5 < probability_of_increase < 1 + ensures + - If you think of the contents of container as a potentially noisy time series, + then this function returns a count of how long the time series has gone + without noticeably increasing in value. It does this by adding the + elements into a running_gradient object and counting how many elements, + starting with container.back(), that you need to examine before you are + confident that the series has been increasing in value. Here, "confident of + increase" means that the probability of increase is >= probability_of_increase. + - Setting probability_of_increase to 0.51 means we count until we see even a + small hint of increase, whereas a larger value of 0.99 would return a larger + count since it keeps going until it is nearly certain the time series is + increasing. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + double find_upper_quantile ( + const T& container, + double quantile + ); + /*! + requires + - container must be a container of double values that can be enumerated with + .begin() and .end(). + - 0 <= quantile <= 1 + - container.size() > 0 + ensures + - Finds and returns the value such that quantile percent of the values in + container are greater than it. For example, 0.5 would find the median value + in container while 0.1 would find the value that lower bounded the 10% + largest values in container. + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_ + + diff --git a/ml/dlib/dlib/statistics/sammon.h b/ml/dlib/dlib/statistics/sammon.h new file mode 100644 index 000000000..1a3eb72a1 --- /dev/null +++ b/ml/dlib/dlib/statistics/sammon.h @@ -0,0 +1,269 @@ +// Copyright (C) 2012 Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_SAMMoN_Hh_ +#define DLIB_SAMMoN_Hh_ + +#include "sammon_abstract.h" +#include "../matrix.h" +#include "../algs.h" +#include "dpca.h" +#include <vector> + +namespace dlib +{ + + class sammon_projection + { + + public: + + // ------------------------------------------------------------------------------------ + + template <typename matrix_type> + std::vector<matrix<double,0,1> > operator() ( + const std::vector<matrix_type>& data, + const long num_dims + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(num_dims > 0, + "\t std::vector<matrix<double,0,1> > sammon_projection::operator()" + << "\n\t Invalid inputs were given to this function." + << "\n\t num_dims: " << num_dims + ); + std::vector<matrix<double,0,1> > result; // projections + if (data.size() == 0) + { + return result; + } + +#ifdef ENABLE_ASSERTS + DLIB_ASSERT(0 < num_dims && num_dims <= data[0].size(), + "\t std::vector<matrix<double,0,1> > sammon_projection::operator()" + << "\n\t Invalid inputs were given to this function." + << "\n\t data.size(): " << data.size() + << "\n\t num_dims: " << num_dims + << "\n\t data[0].size(): " << data[0].size() + ); + for (unsigned long i = 0; i < data.size(); ++i) + { + DLIB_ASSERT(is_col_vector(data[i]) && data[i].size() == data[0].size(), + "\t std::vector<matrix<double,0,1> > sammon_projection::operator()" + << "\n\t Invalid inputs were given to this function." + << "\n\t data["<<i<<"].size(): " << data[i].size() + << "\n\t data[0].size(): " << data[0].size() + << "\n\t is_col_vector(data["<<i<<"]): " << is_col_vector(data[i]) + ); + } +#endif + + double err; // error (discarded) + do_sammon_projection(data, num_dims, result, err); + return result; + } + + // ------------------------------------------------------------------------------------ + + template <typename matrix_type> + void operator() ( + const std::vector<matrix_type>& data, + const long num_dims, + std::vector<matrix<double,0,1> >& result, + double &err, + const unsigned long num_iters = 1000, + const double err_delta = 1.0e-9 + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(num_dims > 0 && num_iters > 0 && err_delta > 0.0, + "\t std::vector<matrix<double,0,1> > sammon_projection::operator()" + << "\n\t Invalid inputs were given to this function." + << "\n\t data.size(): " << data.size() + << "\n\t num_dims: " << num_dims + << "\n\t num_iters: " << num_iters + << "\n\t err_delta: " << err_delta + ); + if (data.size() == 0) + { + result.clear(); + err = 0; + return; + } + +#ifdef ENABLE_ASSERTS + DLIB_ASSERT(0 < num_dims && num_dims <= data[0].size(), + "\t std::vector<matrix<double,0,1> > sammon_projection::operator()" + << "\n\t Invalid inputs were given to this function." + << "\n\t data.size(): " << data.size() + << "\n\t num_dims: " << num_dims + << "\n\t data[0].size(): " << data[0].size() + ); + for (unsigned long i = 0; i < data.size(); ++i) + { + DLIB_ASSERT(is_col_vector(data[i]) && data[i].size() == data[0].size(), + "\t std::vector<matrix<double,0,1> > sammon_projection::operator()" + << "\n\t Invalid inputs were given to this function." + << "\n\t data["<<i<<"].size(): " << data[i].size() + << "\n\t data[0].size(): " << data[0].size() + << "\n\t is_col_vector(data["<<i<<"]): " << is_col_vector(data[i]) + ); + } +#endif + + do_sammon_projection(data, num_dims, result, err, num_iters, err_delta); + } + + // ---------------------------------------------------------------------------------------- + // ---------------------------------------------------------------------------------------- + + private: + + void compute_relative_distances( + matrix<double,0,1>& dist, // relative distances (output) + matrix<double,0,0>& data, // input data (matrix whose columns are the input vectors) + double eps_ratio = 1.0e-7 // to compute the minimum distance eps + ) + /*! + requires + - dist.nc() == comb( data.nc(), 2 ), preallocated + - eps_ratio > 0 + ensures + - dist[k] == lenght(data[i] - data[j]) for k = j(j-1)/2 + i + !*/ + { + const long N = data.nc(); // num of points + double eps; // minimum distance, forced to avoid vectors collision + // computed at runtime as eps_ration * mean(vectors distances) + for (int k = 0, i = 1; i < N; ++i) + for (int j = 0; j < i; ++j) + dist(k++) = length(colm(data, i) - colm(data, j)); + + eps = eps_ratio * mean(dist); + dist = lowerbound(dist, eps); + } + + // ---------------------------------------------------------------------------------------- + + template <typename matrix_type> + void do_sammon_projection( + const std::vector<matrix_type>& data, // input data + unsigned long num_dims, // dimension of the reduced space + std::vector<matrix<double,0,1> >& result, // projections (output) + double &err, // error (output) + unsigned long num_iters = 1000, // max num of iterations: stop condition + const double err_delta = 1.0e-9 // delta error: stop condition + ) + /*! + requires + - matrix_type should be a kind of dlib::matrix<double,N,1> + - num_dims > 0 + - num_iters > 0 + - err_delta > 0 + ensures + - result == a set of matrix<double,num_dims,1> objects that represent + the Sammon's projections of data vectors. + - err == the estimated error done in the projection, with the extra + property that err(at previous iteration) - err < err_delta + !*/ + { + // other params + const double mf = 0.3; // magic factor + + matrix<double> mdata; // input data as matrix + matrix<double> projs; // projected vectors, i.e. output data as matrix + + // std::vector<matrix> -> matrix + mdata.set_size(data[0].size(), data.size()); + for (unsigned int i = 0; i < data.size(); i++) + set_colm(mdata, i) = data[i]; + + const long N = mdata.nc(); // num of points + const long d = num_dims; // size of the reduced space + const long nd = N * (N - 1) / 2; // num of pairs of points = size of the distances vectors + + matrix<double, 0, 1> dsij, inv_dsij; // d*_ij: pair-wise distances in the input space (and inverses) + dsij.set_size(nd, 1); + inv_dsij.set_size(nd, 1); + double ic; // 1.0 / sum of dsij + + matrix<double, 0, 1> dij; // d_ij: pair-wise distances in the reduced space + dij.set_size(nd, 1); + + matrix<double, 0, 0> dE, dE2, dtemp; // matrices representing error partial derivatives + dE.set_size(d, N); + dE2.set_size(d, N); + dtemp.set_size(d, N); + + matrix<double, 0, 1> inv_dij, alpha; // utility vectors used to compute the partial derivatives + inv_dij.set_size(N, 1); // inv_dij is 1.0/dij, but we only need it column-wise + alpha.set_size(N, 1); // (slightly wasting a bit of computation) + // alpha = 1.0/dij - 1.0/dsij, again column-wise + + + // initialize projs with PCA + discriminant_pca<matrix<double> > dpca; + for (int i = 0; i < mdata.nc(); ++i) + { + dpca.add_to_total_variance(colm(mdata, i)); + } + matrix<double> mat = dpca.dpca_matrix_of_size(num_dims); + projs = mat * mdata; + + // compute dsij, inv_dsij and ic + compute_relative_distances(dsij, mdata); + inv_dsij = 1.0 / dsij; + ic = 1.0 / sum(dsij); + + // compute dij and err + compute_relative_distances(dij, projs); + err = ic * sum(pointwise_multiply(squared(dij - dsij), inv_dsij)); + + // start iterating + while (num_iters--) + { + // compute dE, dE2 progressively column by column + for (int p = 0; p < N; ++p) + { + // compute + // - alpha_p, the column vector with 1/d_pj - 1/d*_pj + // - dtemp, the matrix with the p-th column repeated all along + //TODO: optimize constructions + for (int i = 0; i < N; ++i) + { + int pos = (i < p) ? p * (p - 1) / 2 + i : i * (i - 1) / 2 + p; + inv_dij(i) = (i == p) ? 0.0 : 1.0 / dij(pos); + alpha(i) = (i == p) ? 0.0 : inv_dij(i) - inv_dsij(pos); + set_colm(dtemp, i) = colm(projs, p); + } + + dtemp -= projs; + set_colm(dE, p) = dtemp * alpha; + + double sum_alpha = sum(alpha); + set_colm(dE2, p) = abs( sum_alpha + squared(dtemp) * cubed(inv_dij) ); + } + + + // compute the update projections + projs += pointwise_multiply(dE, mf * reciprocal(dE2)); + + // compute new dij and error + compute_relative_distances(dij, projs); + double err_new = ic * sum( pointwise_multiply(squared(dij - dsij), inv_dsij) ); + if (err - err_new < err_delta) + break; + err = err_new; + } + + // matrix -> std::vector<matrix> + result.clear(); + for (int i = 0; i < projs.nc(); ++i) + result.push_back(colm(projs, i)); + } + + }; + +} // namespace dlib + +#endif // DLIB_SAMMoN_Hh_ + diff --git a/ml/dlib/dlib/statistics/sammon_abstract.h b/ml/dlib/dlib/statistics/sammon_abstract.h new file mode 100644 index 000000000..0e009729c --- /dev/null +++ b/ml/dlib/dlib/statistics/sammon_abstract.h @@ -0,0 +1,117 @@ +// Copyright (C) 2012 Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_SAMMoN_ABSTRACT_Hh_ +#ifdef DLIB_SAMMoN_ABSTRACT_Hh_ + +#include "../matrix/matrix_abstract.h" +#include <vector> + +namespace dlib +{ + + class sammon_projection + { + /*! + WHAT THIS OBJECT REPRESENTS + This is a function object that computes the Sammon projection of a set + of N points in a L-dimensional vector space onto a d-dimensional space + (d < L), according to the paper: + A Nonlinear Mapping for Data Structure Analysis (1969) by J.W. Sammon + + The current implementation is a vectorized version of the original algorithm. + !*/ + + public: + + sammon_projection( + ); + /*! + ensures + - this object is properly initialized + !*/ + + template <typename matrix_type> + std::vector<matrix<double,0,1> > operator() ( + const std::vector<matrix_type>& data, + const long num_dims + ); + /*! + requires + - num_dims > 0 + - matrix_type should be a kind of dlib::matrix of doubles capable + of representing column vectors. + - for all valid i: + - is_col_vector(data[i]) == true + - data[0].size() == data[i].size() + (i.e. all the vectors in data must have the same dimensionality) + - if (data.size() != 0) then + - 0 < num_dims <= data[0].size() + (i.e. you can't project into a higher dimension than the input data, + only to a lower dimension.) + ensures + - This routine computes Sammon's dimensionality reduction method based on the + given input data. It will attempt to project the contents of data into a + num_dims dimensional space that preserves relative distances between the + input data points. + - This function returns a std::vector, OUT, such that: + - OUT == a set of column vectors that represent the Sammon projection of + the input data vectors. + - OUT.size() == data.size() + - for all valid i: + - OUT[i].size() == num_dims + - OUT[i] == the Sammon projection of the input vector data[i] + !*/ + + template <typename matrix_type> + void operator() ( + const std::vector<matrix_type>& data, + const long num_dims, + std::vector<matrix<double,0,1> >& result, + double &err, + const unsigned long num_iters = 1000, + const double err_delta = 1.0e-9 + ); + /*! + requires + - num_iters > 0 + - err_delta > 0 + - num_dims > 0 + - matrix_type should be a kind of dlib::matrix of doubles capable + of representing column vectors. + - for all valid i: + - is_col_vector(data[i]) == true + - data[0].size() == data[i].size() + (i.e. all the vectors in data must have the same dimensionality) + - if (data.size() != 0) then + - 0 < num_dims <= data[0].size() + (i.e. you can't project into a higher dimension than the input data, + only to a lower dimension.) + ensures + - This routine computes Sammon's dimensionality reduction method based on the + given input data. It will attempt to project the contents of data into a + num_dims dimensional space that preserves relative distances between the + input data points. + - #err == the final error value at the end of the algorithm. The goal of Sammon's + algorithm is to find a lower dimensional projection of the input data that + preserves the relative distances between points. The value in #err is a measure + of the total error at the end of the algorithm. So smaller values indicate + a better projection was found than if a large value is returned via #err. + - Sammon's algorithm will run until either num_iters iterations has executed + or the change in error from one iteration to the next is less than err_delta. + - Upon completion, the output of Sammon's projection is stored into #result, in + particular, we will have: + - #result == a set of column vectors that represent the Sammon projection of + the input data vectors. + - #result.size() == data.size() + - for all valid i: + - #result[i].size() == num_dims + - #result[i] == the Sammon projection of the input vector data[i] + !*/ + + }; + +} + +#endif // DLIB_SAMMoN_ABSTRACT_Hh_ + + diff --git a/ml/dlib/dlib/statistics/statistics.h b/ml/dlib/dlib/statistics/statistics.h new file mode 100644 index 000000000..9dee7006b --- /dev/null +++ b/ml/dlib/dlib/statistics/statistics.h @@ -0,0 +1,1890 @@ +// Copyright (C) 2008 Davis E. King (davis@dlib.net), Steve Taylor +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_STATISTICs_ +#define DLIB_STATISTICs_ + +#include "statistics_abstract.h" +#include <limits> +#include <cmath> +#include "../algs.h" +#include "../matrix.h" +#include "../sparse_vector.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + class running_stats + { + public: + + running_stats() + { + clear(); + + COMPILE_TIME_ASSERT (( + is_same_type<float,T>::value || + is_same_type<double,T>::value || + is_same_type<long double,T>::value + )); + } + + void clear() + { + sum = 0; + sum_sqr = 0; + sum_cub = 0; + sum_four = 0; + + n = 0; + min_value = std::numeric_limits<T>::infinity(); + max_value = -std::numeric_limits<T>::infinity(); + } + + void add ( + const T& val + ) + { + sum += val; + sum_sqr += val*val; + sum_cub += cubed(val); + sum_four += quaded(val); + + if (val < min_value) + min_value = val; + if (val > max_value) + max_value = val; + + ++n; + } + + T current_n ( + ) const + { + return n; + } + + T mean ( + ) const + { + if (n != 0) + return sum/n; + else + return 0; + } + + T max ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 0, + "\tT running_stats::max" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + return max_value; + } + + T min ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 0, + "\tT running_stats::min" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + return min_value; + } + + T variance ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_stats::variance" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + T temp = 1/(n-1); + temp = temp*(sum_sqr - sum*sum/n); + // make sure the variance is never negative. This might + // happen due to numerical errors. + if (temp >= 0) + return temp; + else + return 0; + } + + T stddev ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_stats::stddev" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + return std::sqrt(variance()); + } + + T skewness ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 2, + "\tT running_stats::skewness" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + T temp = 1/n; + T temp1 = std::sqrt(n*(n-1))/(n-2); + temp = temp1*temp*(sum_cub - 3*sum_sqr*sum*temp + 2*cubed(sum)*temp*temp)/ + (std::sqrt(std::pow(temp*(sum_sqr-sum*sum*temp),3))); + + return temp; + } + + T ex_kurtosis ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 3, + "\tT running_stats::kurtosis" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + T temp = 1/n; + T m4 = temp*(sum_four - 4*sum_cub*sum*temp+6*sum_sqr*sum*sum*temp*temp + -3*quaded(sum)*cubed(temp)); + T m2 = temp*(sum_sqr-sum*sum*temp); + temp = (n-1)*((n+1)*m4/(m2*m2)-3*(n-1))/((n-2)*(n-3)); + + return temp; + } + + T scale ( + const T& val + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_stats::variance" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + return (val-mean())/std::sqrt(variance()); + } + + running_stats operator+ ( + const running_stats& rhs + ) const + { + running_stats temp(*this); + + temp.sum += rhs.sum; + temp.sum_sqr += rhs.sum_sqr; + temp.sum_cub += rhs.sum_cub; + temp.sum_four += rhs.sum_four; + temp.n += rhs.n; + temp.min_value = std::min(rhs.min_value, min_value); + temp.max_value = std::max(rhs.max_value, max_value); + return temp; + } + + template <typename U> + friend void serialize ( + const running_stats<U>& item, + std::ostream& out + ); + + template <typename U> + friend void deserialize ( + running_stats<U>& item, + std::istream& in + ); + + private: + T sum; + T sum_sqr; + T sum_cub; + T sum_four; + T n; + T min_value; + T max_value; + + T cubed (const T& val) const {return val*val*val; } + T quaded (const T& val) const {return val*val*val*val; } + }; + + template <typename T> + void serialize ( + const running_stats<T>& item, + std::ostream& out + ) + { + int version = 2; + serialize(version, out); + + serialize(item.sum, out); + serialize(item.sum_sqr, out); + serialize(item.sum_cub, out); + serialize(item.sum_four, out); + serialize(item.n, out); + serialize(item.min_value, out); + serialize(item.max_value, out); + } + + template <typename T> + void deserialize ( + running_stats<T>& item, + std::istream& in + ) + { + int version = 0; + deserialize(version, in); + if (version != 2) + throw dlib::serialization_error("Unexpected version number found while deserializing dlib::running_stats object."); + + deserialize(item.sum, in); + deserialize(item.sum_sqr, in); + deserialize(item.sum_cub, in); + deserialize(item.sum_four, in); + deserialize(item.n, in); + deserialize(item.min_value, in); + deserialize(item.max_value, in); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + class running_scalar_covariance + { + public: + + running_scalar_covariance() + { + clear(); + + COMPILE_TIME_ASSERT (( + is_same_type<float,T>::value || + is_same_type<double,T>::value || + is_same_type<long double,T>::value + )); + } + + void clear() + { + sum_xy = 0; + sum_x = 0; + sum_y = 0; + sum_xx = 0; + sum_yy = 0; + n = 0; + } + + void add ( + const T& x, + const T& y + ) + { + sum_xy += x*y; + + sum_xx += x*x; + sum_yy += y*y; + + sum_x += x; + sum_y += y; + + n += 1; + } + + T current_n ( + ) const + { + return n; + } + + T mean_x ( + ) const + { + if (n != 0) + return sum_x/n; + else + return 0; + } + + T mean_y ( + ) const + { + if (n != 0) + return sum_y/n; + else + return 0; + } + + T covariance ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_scalar_covariance::covariance()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + return 1/(n-1) * (sum_xy - sum_y*sum_x/n); + } + + T correlation ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_scalar_covariance::correlation()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + return covariance() / std::sqrt(variance_x()*variance_y()); + } + + T variance_x ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_scalar_covariance::variance_x()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + T temp = 1/(n-1) * (sum_xx - sum_x*sum_x/n); + // make sure the variance is never negative. This might + // happen due to numerical errors. + if (temp >= 0) + return temp; + else + return 0; + } + + T variance_y ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_scalar_covariance::variance_y()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + T temp = 1/(n-1) * (sum_yy - sum_y*sum_y/n); + // make sure the variance is never negative. This might + // happen due to numerical errors. + if (temp >= 0) + return temp; + else + return 0; + } + + T stddev_x ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_scalar_covariance::stddev_x()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + return std::sqrt(variance_x()); + } + + T stddev_y ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_scalar_covariance::stddev_y()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + return std::sqrt(variance_y()); + } + + running_scalar_covariance operator+ ( + const running_scalar_covariance& rhs + ) const + { + running_scalar_covariance temp(rhs); + + temp.sum_xy += sum_xy; + temp.sum_x += sum_x; + temp.sum_y += sum_y; + temp.sum_xx += sum_xx; + temp.sum_yy += sum_yy; + temp.n += n; + return temp; + } + + private: + + T sum_xy; + T sum_x; + T sum_y; + T sum_xx; + T sum_yy; + T n; + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + class running_scalar_covariance_decayed + { + public: + + explicit running_scalar_covariance_decayed( + T decay_halflife = 1000 + ) + { + DLIB_ASSERT(decay_halflife > 0); + + sum_xy = 0; + sum_x = 0; + sum_y = 0; + sum_xx = 0; + sum_yy = 0; + forget = std::pow(0.5, 1/decay_halflife); + n = 0; + + COMPILE_TIME_ASSERT (( + is_same_type<float,T>::value || + is_same_type<double,T>::value || + is_same_type<long double,T>::value + )); + } + + T forget_factor ( + ) const + { + return forget; + } + + void add ( + const T& x, + const T& y + ) + { + sum_xy = sum_xy*forget + x*y; + + sum_xx = sum_xx*forget + x*x; + sum_yy = sum_yy*forget + y*y; + + sum_x = sum_x*forget + x; + sum_y = sum_y*forget + y; + + n = n*forget + 1; + } + + T current_n ( + ) const + { + return n; + } + + T mean_x ( + ) const + { + if (n != 0) + return sum_x/n; + else + return 0; + } + + T mean_y ( + ) const + { + if (n != 0) + return sum_y/n; + else + return 0; + } + + T covariance ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_scalar_covariance_decayed::covariance()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + return 1/(n-1) * (sum_xy - sum_y*sum_x/n); + } + + T correlation ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_scalar_covariance_decayed::correlation()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + T temp = std::sqrt(variance_x()*variance_y()); + if (temp != 0) + return covariance() / temp; + else + return 0; // just say it's zero if there isn't any variance in x or y. + } + + T variance_x ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_scalar_covariance_decayed::variance_x()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + T temp = 1/(n-1) * (sum_xx - sum_x*sum_x/n); + // make sure the variance is never negative. This might + // happen due to numerical errors. + if (temp >= 0) + return temp; + else + return 0; + } + + T variance_y ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_scalar_covariance_decayed::variance_y()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + T temp = 1/(n-1) * (sum_yy - sum_y*sum_y/n); + // make sure the variance is never negative. This might + // happen due to numerical errors. + if (temp >= 0) + return temp; + else + return 0; + } + + T stddev_x ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_scalar_covariance_decayed::stddev_x()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + return std::sqrt(variance_x()); + } + + T stddev_y ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_scalar_covariance_decayed::stddev_y()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + return std::sqrt(variance_y()); + } + + private: + + T sum_xy; + T sum_x; + T sum_y; + T sum_xx; + T sum_yy; + T n; + T forget; + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + class running_stats_decayed + { + public: + + explicit running_stats_decayed( + T decay_halflife = 1000 + ) + { + DLIB_ASSERT(decay_halflife > 0); + + sum_x = 0; + sum_xx = 0; + forget = std::pow(0.5, 1/decay_halflife); + n = 0; + + COMPILE_TIME_ASSERT (( + is_same_type<float,T>::value || + is_same_type<double,T>::value || + is_same_type<long double,T>::value + )); + } + + T forget_factor ( + ) const + { + return forget; + } + + void add ( + const T& x + ) + { + + sum_xx = sum_xx*forget + x*x; + + sum_x = sum_x*forget + x; + + n = n*forget + 1; + } + + T current_n ( + ) const + { + return n; + } + + T mean ( + ) const + { + if (n != 0) + return sum_x/n; + else + return 0; + } + + T variance ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_stats_decayed::variance()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + T temp = 1/(n-1) * (sum_xx - sum_x*sum_x/n); + // make sure the variance is never negative. This might + // happen due to numerical errors. + if (temp >= 0) + return temp; + else + return 0; + } + + T stddev ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(current_n() > 1, + "\tT running_stats_decayed::stddev()" + << "\n\tyou have to add some numbers to this object first" + << "\n\tthis: " << this + ); + + return std::sqrt(variance()); + } + + template <typename U> + friend void serialize ( + const running_stats_decayed<U>& item, + std::ostream& out + ); + + template <typename U> + friend void deserialize ( + running_stats_decayed<U>& item, + std::istream& in + ); + + private: + + T sum_x; + T sum_xx; + T n; + T forget; + }; + + template <typename T> + void serialize ( + const running_stats_decayed<T>& item, + std::ostream& out + ) + { + int version = 1; + serialize(version, out); + + serialize(item.sum_x, out); + serialize(item.sum_xx, out); + serialize(item.n, out); + serialize(item.forget, out); + } + + template <typename T> + void deserialize ( + running_stats_decayed<T>& item, + std::istream& in + ) + { + int version = 0; + deserialize(version, in); + if (version != 1) + throw dlib::serialization_error("Unexpected version number found while deserializing dlib::running_stats_decayed object."); + + deserialize(item.sum_x, in); + deserialize(item.sum_xx, in); + deserialize(item.n, in); + deserialize(item.forget, in); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + double mean_sign_agreement ( + const std::vector<T,alloc>& a, + const std::vector<T,alloc>& b + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(a.size() == b.size(), + "\t double mean_sign_agreement(a,b)" + << "\n\t a and b must be the same length." + << "\n\t a.size(): " << a.size() + << "\n\t b.size(): " << b.size() + ); + + + double temp = 0; + for (unsigned long i = 0; i < a.size(); ++i) + { + if ((a[i] >= 0 && b[i] >= 0) || + (a[i] < 0 && b[i] < 0)) + { + temp += 1; + } + } + + return temp/a.size(); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + double correlation ( + const std::vector<T,alloc>& a, + const std::vector<T,alloc>& b + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(a.size() == b.size() && a.size() > 1, + "\t double correlation(a,b)" + << "\n\t a and b must be the same length and have more than one element." + << "\n\t a.size(): " << a.size() + << "\n\t b.size(): " << b.size() + ); + + running_scalar_covariance<double> rs; + for (unsigned long i = 0; i < a.size(); ++i) + { + rs.add(a[i], b[i]); + } + return rs.correlation(); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + double covariance ( + const std::vector<T,alloc>& a, + const std::vector<T,alloc>& b + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(a.size() == b.size() && a.size() > 1, + "\t double covariance(a,b)" + << "\n\t a and b must be the same length and have more than one element." + << "\n\t a.size(): " << a.size() + << "\n\t b.size(): " << b.size() + ); + + running_scalar_covariance<double> rs; + for (unsigned long i = 0; i < a.size(); ++i) + { + rs.add(a[i], b[i]); + } + return rs.covariance(); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + double r_squared ( + const std::vector<T,alloc>& a, + const std::vector<T,alloc>& b + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(a.size() == b.size() && a.size() > 1, + "\t double r_squared(a,b)" + << "\n\t a and b must be the same length and have more than one element." + << "\n\t a.size(): " << a.size() + << "\n\t b.size(): " << b.size() + ); + + return std::pow(correlation(a,b),2.0); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + double mean_squared_error ( + const std::vector<T,alloc>& a, + const std::vector<T,alloc>& b + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(a.size() == b.size(), + "\t double mean_squared_error(a,b)" + << "\n\t a and b must be the same length." + << "\n\t a.size(): " << a.size() + << "\n\t b.size(): " << b.size() + ); + + return mean(squared(matrix_cast<double>(mat(a))-matrix_cast<double>(mat(b)))); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class running_covariance + { + /*! + INITIAL VALUE + - vect_size == 0 + - total_count == 0 + + CONVENTION + - vect_size == in_vector_size() + - total_count == current_n() + + - if (total_count != 0) + - total_sum == the sum of all vectors given to add() + - the covariance of all the elements given to add() is given + by: + - let avg == total_sum/total_count + - covariance == total_cov/total_count - avg*trans(avg) + !*/ + public: + + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef typename matrix_type::layout_type layout_type; + typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; + typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; + + running_covariance( + ) + { + clear(); + } + + void clear( + ) + { + total_count = 0; + + vect_size = 0; + + total_sum.set_size(0); + total_cov.set_size(0,0); + } + + long in_vector_size ( + ) const + { + return vect_size; + } + + long current_n ( + ) const + { + return static_cast<long>(total_count); + } + + void set_dimension ( + long size + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( size > 0, + "\t void running_covariance::set_dimension()" + << "\n\t Invalid inputs were given to this function" + << "\n\t size: " << size + << "\n\t this: " << this + ); + + clear(); + vect_size = size; + total_sum.set_size(size); + total_cov.set_size(size,size); + total_sum = 0; + total_cov = 0; + } + + template <typename T> + typename disable_if<is_matrix<T> >::type add ( + const T& val + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(((long)max_index_plus_one(val) <= in_vector_size() && in_vector_size() > 0), + "\t void running_covariance::add()" + << "\n\t Invalid inputs were given to this function" + << "\n\t max_index_plus_one(val): " << max_index_plus_one(val) + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t this: " << this + ); + + for (typename T::const_iterator i = val.begin(); i != val.end(); ++i) + { + total_sum(i->first) += i->second; + for (typename T::const_iterator j = val.begin(); j != val.end(); ++j) + { + total_cov(i->first, j->first) += i->second*j->second; + } + } + + ++total_count; + } + + template <typename T> + typename enable_if<is_matrix<T> >::type add ( + const T& val + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(is_col_vector(val) && (in_vector_size() == 0 || val.size() == in_vector_size()), + "\t void running_covariance::add()" + << "\n\t Invalid inputs were given to this function" + << "\n\t is_col_vector(val): " << is_col_vector(val) + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t val.size(): " << val.size() + << "\n\t this: " << this + ); + + vect_size = val.size(); + if (total_count == 0) + { + total_cov = val*trans(val); + total_sum = val; + } + else + { + total_cov += val*trans(val); + total_sum += val; + } + ++total_count; + } + + const column_matrix mean ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT( in_vector_size() != 0, + "\t running_covariance::mean()" + << "\n\t This object can not execute this function in its current state." + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t current_n(): " << current_n() + << "\n\t this: " << this + ); + + return total_sum/total_count; + } + + const general_matrix covariance ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT( in_vector_size() != 0 && current_n() > 1, + "\t running_covariance::covariance()" + << "\n\t This object can not execute this function in its current state." + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t current_n(): " << current_n() + << "\n\t this: " << this + ); + + return (total_cov - total_sum*trans(total_sum)/total_count)/(total_count-1); + } + + const running_covariance operator+ ( + const running_covariance& item + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT((in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()), + "\t running_covariance running_covariance::operator+()" + << "\n\t The two running_covariance objects being added must have compatible parameters" + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t item.in_vector_size(): " << item.in_vector_size() + << "\n\t this: " << this + ); + + running_covariance temp(item); + + // make sure we ignore empty matrices + if (total_count != 0 && temp.total_count != 0) + { + temp.total_cov += total_cov; + temp.total_sum += total_sum; + temp.total_count += total_count; + } + else if (total_count != 0) + { + temp.total_cov = total_cov; + temp.total_sum = total_sum; + temp.total_count = total_count; + } + + return temp; + } + + + private: + + general_matrix total_cov; + column_matrix total_sum; + scalar_type total_count; + + long vect_size; + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class running_cross_covariance + { + /*! + INITIAL VALUE + - x_vect_size == 0 + - y_vect_size == 0 + - total_count == 0 + + CONVENTION + - x_vect_size == x_vector_size() + - y_vect_size == y_vector_size() + - total_count == current_n() + + - if (total_count != 0) + - sum_x == the sum of all x vectors given to add() + - sum_y == the sum of all y vectors given to add() + - total_cov == sum of all x*trans(y) given to add() + !*/ + + public: + + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef typename matrix_type::layout_type layout_type; + typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; + typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; + + running_cross_covariance( + ) + { + clear(); + } + + void clear( + ) + { + total_count = 0; + + x_vect_size = 0; + y_vect_size = 0; + + sum_x.set_size(0); + sum_y.set_size(0); + total_cov.set_size(0,0); + } + + long x_vector_size ( + ) const + { + return x_vect_size; + } + + long y_vector_size ( + ) const + { + return y_vect_size; + } + + void set_dimensions ( + long x_size, + long y_size + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( x_size > 0 && y_size > 0, + "\t void running_cross_covariance::set_dimensions()" + << "\n\t Invalid inputs were given to this function" + << "\n\t x_size: " << x_size + << "\n\t y_size: " << y_size + << "\n\t this: " << this + ); + + clear(); + x_vect_size = x_size; + y_vect_size = y_size; + sum_x.set_size(x_size); + sum_y.set_size(y_size); + total_cov.set_size(x_size,y_size); + + sum_x = 0; + sum_y = 0; + total_cov = 0; + } + + long current_n ( + ) const + { + return static_cast<long>(total_count); + } + + template <typename T, typename U> + typename enable_if_c<!is_matrix<T>::value && !is_matrix<U>::value>::type add ( + const T& x, + const U& y + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( ((long)max_index_plus_one(x) <= x_vector_size() && x_vector_size() > 0) && + ((long)max_index_plus_one(y) <= y_vector_size() && y_vector_size() > 0) , + "\t void running_cross_covariance::add()" + << "\n\t Invalid inputs were given to this function" + << "\n\t max_index_plus_one(x): " << max_index_plus_one(x) + << "\n\t max_index_plus_one(y): " << max_index_plus_one(y) + << "\n\t x_vector_size(): " << x_vector_size() + << "\n\t y_vector_size(): " << y_vector_size() + << "\n\t this: " << this + ); + + for (typename T::const_iterator i = x.begin(); i != x.end(); ++i) + { + sum_x(i->first) += i->second; + for (typename U::const_iterator j = y.begin(); j != y.end(); ++j) + { + total_cov(i->first, j->first) += i->second*j->second; + } + } + + // do sum_y += y + for (typename U::const_iterator j = y.begin(); j != y.end(); ++j) + { + sum_y(j->first) += j->second; + } + + ++total_count; + } + + template <typename T, typename U> + typename enable_if_c<is_matrix<T>::value && !is_matrix<U>::value>::type add ( + const T& x, + const U& y + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( (is_col_vector(x) && x.size() == x_vector_size() && x_vector_size() > 0) && + ((long)max_index_plus_one(y) <= y_vector_size() && y_vector_size() > 0) , + "\t void running_cross_covariance::add()" + << "\n\t Invalid inputs were given to this function" + << "\n\t is_col_vector(x): " << is_col_vector(x) + << "\n\t x.size(): " << x.size() + << "\n\t max_index_plus_one(y): " << max_index_plus_one(y) + << "\n\t x_vector_size(): " << x_vector_size() + << "\n\t y_vector_size(): " << y_vector_size() + << "\n\t this: " << this + ); + + sum_x += x; + + for (long i = 0; i < x.size(); ++i) + { + for (typename U::const_iterator j = y.begin(); j != y.end(); ++j) + { + total_cov(i, j->first) += x(i)*j->second; + } + } + + // do sum_y += y + for (typename U::const_iterator j = y.begin(); j != y.end(); ++j) + { + sum_y(j->first) += j->second; + } + + ++total_count; + } + + template <typename T, typename U> + typename enable_if_c<!is_matrix<T>::value && is_matrix<U>::value>::type add ( + const T& x, + const U& y + ) + { + // make sure requires clause is not broken + DLIB_ASSERT( ((long)max_index_plus_one(x) <= x_vector_size() && x_vector_size() > 0) && + (is_col_vector(y) && y.size() == (long)y_vector_size() && y_vector_size() > 0) , + "\t void running_cross_covariance::add()" + << "\n\t Invalid inputs were given to this function" + << "\n\t max_index_plus_one(x): " << max_index_plus_one(x) + << "\n\t is_col_vector(y): " << is_col_vector(y) + << "\n\t y.size(): " << y.size() + << "\n\t x_vector_size(): " << x_vector_size() + << "\n\t y_vector_size(): " << y_vector_size() + << "\n\t this: " << this + ); + + for (typename T::const_iterator i = x.begin(); i != x.end(); ++i) + { + sum_x(i->first) += i->second; + for (long j = 0; j < y.size(); ++j) + { + total_cov(i->first, j) += i->second*y(j); + } + } + + sum_y += y; + + ++total_count; + } + + template <typename T, typename U> + typename enable_if_c<is_matrix<T>::value && is_matrix<U>::value>::type add ( + const T& x, + const U& y + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(is_col_vector(x) && (x_vector_size() == 0 || x.size() == x_vector_size()) && + is_col_vector(y) && (y_vector_size() == 0 || y.size() == y_vector_size()) && + x.size() != 0 && + y.size() != 0, + "\t void running_cross_covariance::add()" + << "\n\t Invalid inputs were given to this function" + << "\n\t is_col_vector(x): " << is_col_vector(x) + << "\n\t x_vector_size(): " << x_vector_size() + << "\n\t x.size(): " << x.size() + << "\n\t is_col_vector(y): " << is_col_vector(y) + << "\n\t y_vector_size(): " << y_vector_size() + << "\n\t y.size(): " << y.size() + << "\n\t this: " << this + ); + + x_vect_size = x.size(); + y_vect_size = y.size(); + if (total_count == 0) + { + total_cov = x*trans(y); + sum_x = x; + sum_y = y; + } + else + { + total_cov += x*trans(y); + sum_x += x; + sum_y += y; + } + ++total_count; + } + + const column_matrix mean_x ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT( current_n() != 0, + "\t running_cross_covariance::mean()" + << "\n\t This object can not execute this function in its current state." + << "\n\t current_n(): " << current_n() + << "\n\t this: " << this + ); + + return sum_x/total_count; + } + + const column_matrix mean_y ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT( current_n() != 0, + "\t running_cross_covariance::mean()" + << "\n\t This object can not execute this function in its current state." + << "\n\t current_n(): " << current_n() + << "\n\t this: " << this + ); + + return sum_y/total_count; + } + + const general_matrix covariance_xy ( + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT( current_n() > 1, + "\t running_cross_covariance::covariance()" + << "\n\t This object can not execute this function in its current state." + << "\n\t x_vector_size(): " << x_vector_size() + << "\n\t y_vector_size(): " << y_vector_size() + << "\n\t current_n(): " << current_n() + << "\n\t this: " << this + ); + + return (total_cov - sum_x*trans(sum_y)/total_count)/(total_count-1); + } + + const running_cross_covariance operator+ ( + const running_cross_covariance& item + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT((x_vector_size() == 0 || item.x_vector_size() == 0 || x_vector_size() == item.x_vector_size()) && + (y_vector_size() == 0 || item.y_vector_size() == 0 || y_vector_size() == item.y_vector_size()), + "\t running_cross_covariance running_cross_covariance::operator+()" + << "\n\t The two running_cross_covariance objects being added must have compatible parameters" + << "\n\t x_vector_size(): " << x_vector_size() + << "\n\t item.x_vector_size(): " << item.x_vector_size() + << "\n\t y_vector_size(): " << y_vector_size() + << "\n\t item.y_vector_size(): " << item.y_vector_size() + << "\n\t this: " << this + ); + + running_cross_covariance temp(item); + + // make sure we ignore empty matrices + if (total_count != 0 && temp.total_count != 0) + { + temp.total_cov += total_cov; + temp.sum_x += sum_x; + temp.sum_y += sum_y; + temp.total_count += total_count; + } + else if (total_count != 0) + { + temp.total_cov = total_cov; + temp.sum_x = sum_x; + temp.sum_y = sum_y; + temp.total_count = total_count; + } + + return temp; + } + + + private: + + general_matrix total_cov; + column_matrix sum_x; + column_matrix sum_y; + scalar_type total_count; + + long x_vect_size; + long y_vect_size; + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class vector_normalizer + { + public: + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef matrix_type result_type; + + template <typename vector_type> + void train ( + const vector_type& samples + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(samples.size() > 0, + "\tvoid vector_normalizer::train()" + << "\n\tyou have to give a nonempty set of samples to this function" + << "\n\tthis: " << this + ); + + m = mean(mat(samples)); + sd = reciprocal(sqrt(variance(mat(samples)))); + + DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer::train() have infinite or NaN values"); + } + + long in_vector_size ( + ) const + { + return m.nr(); + } + + long out_vector_size ( + ) const + { + return m.nr(); + } + + const matrix_type& means ( + ) const + { + return m; + } + + const matrix_type& std_devs ( + ) const + { + return sd; + } + + const result_type& operator() ( + const matrix_type& x + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(x.nr() == in_vector_size() && x.nc() == 1, + "\tmatrix vector_normalizer::operator()" + << "\n\t you have given invalid arguments to this function" + << "\n\t x.nr(): " << x.nr() + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t x.nc(): " << x.nc() + << "\n\t this: " << this + ); + + temp_out = pointwise_multiply(x-m, sd); + return temp_out; + } + + void swap ( + vector_normalizer& item + ) + { + m.swap(item.m); + sd.swap(item.sd); + temp_out.swap(item.temp_out); + } + + template <typename mt> + friend void deserialize ( + vector_normalizer<mt>& item, + std::istream& in + ); + + template <typename mt> + friend void serialize ( + const vector_normalizer<mt>& item, + std::ostream& out + ); + + private: + + // ------------------- private data members ------------------- + + matrix_type m, sd; + + // This is just a temporary variable that doesn't contribute to the + // state of this object. + mutable matrix_type temp_out; + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + inline void swap ( + vector_normalizer<matrix_type>& a, + vector_normalizer<matrix_type>& b + ) { a.swap(b); } + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + void deserialize ( + vector_normalizer<matrix_type>& item, + std::istream& in + ) + { + deserialize(item.m, in); + deserialize(item.sd, in); + // Keep deserializing the pca matrix for backwards compatibility. + matrix<double> pca; + deserialize(pca, in); + + if (pca.size() != 0) + throw serialization_error("Error deserializing object of type vector_normalizer\n" + "It looks like a serialized vector_normalizer_pca was accidentally deserialized into \n" + "a vector_normalizer object."); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + void serialize ( + const vector_normalizer<matrix_type>& item, + std::ostream& out + ) + { + serialize(item.m, out); + serialize(item.sd, out); + // Keep serializing the pca matrix for backwards compatibility. + matrix<double> pca; + serialize(pca, out); + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class vector_normalizer_pca + { + public: + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef matrix<scalar_type,0,1,mem_manager_type> result_type; + + template <typename vector_type> + void train ( + const vector_type& samples, + const double eps = 0.99 + ) + { + // You are getting an error here because you are trying to apply PCA + // to a vector of fixed length. But PCA is going to try and do + // dimensionality reduction so you can't use a vector with a fixed dimension. + COMPILE_TIME_ASSERT(matrix_type::NR == 0); + + // make sure requires clause is not broken + DLIB_ASSERT(samples.size() > 0, + "\tvoid vector_normalizer_pca::train_pca()" + << "\n\tyou have to give a nonempty set of samples to this function" + << "\n\tthis: " << this + ); + DLIB_ASSERT(0 < eps && eps <= 1, + "\tvoid vector_normalizer_pca::train_pca()" + << "\n\tyou have to give a nonempty set of samples to this function" + << "\n\tthis: " << this + ); + train_pca_impl(mat(samples),eps); + + DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer_pca::train() have infinite or NaN values"); + } + + long in_vector_size ( + ) const + { + return m.nr(); + } + + long out_vector_size ( + ) const + { + return pca.nr(); + } + + const matrix<scalar_type,0,1,mem_manager_type>& means ( + ) const + { + return m; + } + + const matrix<scalar_type,0,1,mem_manager_type>& std_devs ( + ) const + { + return sd; + } + + const matrix<scalar_type,0,0,mem_manager_type>& pca_matrix ( + ) const + { + return pca; + } + + const result_type& operator() ( + const matrix_type& x + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(x.nr() == in_vector_size() && x.nc() == 1, + "\tmatrix vector_normalizer_pca::operator()" + << "\n\t you have given invalid arguments to this function" + << "\n\t x.nr(): " << x.nr() + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t x.nc(): " << x.nc() + << "\n\t this: " << this + ); + + // If we have a pca transform matrix on hand then + // also apply that. + temp_out = pca*pointwise_multiply(x-m, sd); + + return temp_out; + } + + void swap ( + vector_normalizer_pca& item + ) + { + m.swap(item.m); + sd.swap(item.sd); + pca.swap(item.pca); + temp_out.swap(item.temp_out); + } + + template <typename T> + friend void deserialize ( + vector_normalizer_pca<T>& item, + std::istream& in + ); + + template <typename T> + friend void serialize ( + const vector_normalizer_pca<T>& item, + std::ostream& out + ); + + private: + + template <typename mat_type> + void train_pca_impl ( + const mat_type& samples, + const double eps + ) + { + m = mean(samples); + sd = reciprocal(sqrt(variance(samples))); + + // fill x with the normalized version of the input samples + matrix<typename mat_type::type,0,1,mem_manager_type> x(samples); + for (long r = 0; r < x.size(); ++r) + x(r) = pointwise_multiply(x(r)-m, sd); + + matrix<scalar_type,0,0,mem_manager_type> temp, eigen; + matrix<scalar_type,0,1,mem_manager_type> eigenvalues; + + // Compute the svd of the covariance matrix of the normalized inputs + svd(covariance(x), temp, eigen, pca); + eigenvalues = diag(eigen); + + rsort_columns(pca, eigenvalues); + + // figure out how many eigenvectors we want in our pca matrix + const double thresh = sum(eigenvalues)*eps; + long num_vectors = 0; + double total = 0; + for (long r = 0; r < eigenvalues.size() && total < thresh; ++r) + { + ++num_vectors; + total += eigenvalues(r); + } + + // So now we know we want to use num_vectors of the first eigenvectors. So + // pull those out and discard the rest. + pca = trans(colm(pca,range(0,num_vectors-1))); + + // Apply the pca transform to the data in x. Then we will normalize the + // pca matrix below. + for (long r = 0; r < x.nr(); ++r) + { + x(r) = pca*x(r); + } + + // Here we just scale the output features from the pca transform so + // that the variance of each feature is 1. So this doesn't really change + // what the pca is doing, it just makes sure the output features are + // normalized. + pca = trans(scale_columns(trans(pca), reciprocal(sqrt(variance(x))))); + } + + + // ------------------- private data members ------------------- + + matrix<scalar_type,0,1,mem_manager_type> m, sd; + matrix<scalar_type,0,0,mem_manager_type> pca; + + // This is just a temporary variable that doesn't contribute to the + // state of this object. + mutable result_type temp_out; + }; + + template < + typename matrix_type + > + inline void swap ( + vector_normalizer_pca<matrix_type>& a, + vector_normalizer_pca<matrix_type>& b + ) { a.swap(b); } + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + void deserialize ( + vector_normalizer_pca<matrix_type>& item, + std::istream& in + ) + { + deserialize(item.m, in); + deserialize(item.sd, in); + deserialize(item.pca, in); + if (item.pca.nc() != item.m.nr()) + throw serialization_error("Error deserializing object of type vector_normalizer_pca\n" + "It looks like a serialized vector_normalizer was accidentally deserialized into \n" + "a vector_normalizer_pca object."); + } + + template < + typename matrix_type + > + void serialize ( + const vector_normalizer_pca<matrix_type>& item, + std::ostream& out + ) + { + serialize(item.m, out); + serialize(item.sd, out); + serialize(item.pca, out); + } + +// ---------------------------------------------------------------------------------------- + + inline double binomial_random_vars_are_different ( + uint64_t k1, + uint64_t n1, + uint64_t k2, + uint64_t n2 + ) + { + DLIB_ASSERT(k1 <= n1, "k1: "<< k1 << " n1: "<< n1); + DLIB_ASSERT(k2 <= n2, "k2: "<< k2 << " n2: "<< n2); + + const double p1 = k1/(double)n1; + const double p2 = k2/(double)n2; + const double p = (k1+k2)/(double)(n1+n2); + + auto ll = [](double p, uint64_t k, uint64_t n) { + if (p == 0 || p == 1) + return 0.0; + return k*std::log(p) + (n-k)*std::log(1-p); + }; + + auto logll = ll(p1,k1,n1) + ll(p2,k2,n2) - ll(p,k1,n1) - ll(p,k2,n2); + + // The basic statistic only tells you if the random variables are different. But + // it's nice to know which way they are different, i.e., which one is bigger. So + // stuff that information into the sign bit of the return value. + if (p1>=p2) + return logll; + else + return -logll; + } + +// ---------------------------------------------------------------------------------------- + + inline double event_correlation ( + uint64_t A_count, + uint64_t B_count, + uint64_t AB_count, + uint64_t total_num_observations + ) + { + DLIB_ASSERT(AB_count <= A_count && A_count <= total_num_observations, + "AB_count: " << AB_count << ", A_count: "<< A_count << ", total_num_observations: " << total_num_observations); + DLIB_ASSERT(AB_count <= B_count && B_count <= total_num_observations, + "AB_count: " << AB_count << ", B_count: "<< B_count << ", total_num_observations: " << total_num_observations); + + if (total_num_observations == 0) + return 0; + + DLIB_ASSERT(A_count + B_count - AB_count <= total_num_observations, + "AB_count: " << AB_count << " A_count: " << A_count << ", B_count: "<< B_count << ", total_num_observations: " << total_num_observations); + + + const auto AnotB_count = A_count - AB_count; + const auto notB_count = total_num_observations - B_count; + // How likely is it that the odds of A happening is different when conditioned on + // whether or not B happened? + return binomial_random_vars_are_different( + AB_count, B_count, // A conditional on the presence of B + AnotB_count, notB_count // A conditional on the absence of B + ); + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_STATISTICs_ + diff --git a/ml/dlib/dlib/statistics/statistics_abstract.h b/ml/dlib/dlib/statistics/statistics_abstract.h new file mode 100644 index 000000000..ef8f13802 --- /dev/null +++ b/ml/dlib/dlib/statistics/statistics_abstract.h @@ -0,0 +1,1387 @@ +// Copyright (C) 2008 Davis E. King (davis@dlib.net), Steve Taylor +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_STATISTICs_ABSTRACT_ +#ifdef DLIB_STATISTICs_ABSTRACT_ + +#include <limits> +#include <cmath> +#include "../matrix/matrix_abstract.h" +#include "../svm/sparse_vector_abstract.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + double mean_sign_agreement ( + const std::vector<T,alloc>& a, + const std::vector<T,alloc>& b + ); + /*! + requires + - a.size() == b.size() + ensures + - returns the number of times a[i] has the same sign as b[i] divided by + a.size(). So we return the probability that elements of a and b have + the same sign. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + double correlation ( + const std::vector<T,alloc>& a, + const std::vector<T,alloc>& b + ); + /*! + requires + - a.size() == b.size() + - a.size() > 1 + ensures + - returns the correlation coefficient between all the elements of a and b. + (i.e. how correlated is a(i) with b(i)) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + double covariance ( + const std::vector<T,alloc>& a, + const std::vector<T,alloc>& b + ); + /*! + requires + - a.size() == b.size() + - a.size() > 1 + ensures + - returns the covariance between all the elements of a and b. + (i.e. how does a(i) vary with b(i)) + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + double r_squared ( + const std::vector<T,alloc>& a, + const std::vector<T,alloc>& b + ); + /*! + requires + - a.size() == b.size() + - a.size() > 1 + ensures + - returns the R^2 coefficient of determination between all the elements of a and b. + This value is just the square of correlation(a,b). + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename alloc + > + double mean_squared_error ( + const std::vector<T,alloc>& a, + const std::vector<T,alloc>& b + ); + /*! + requires + - a.size() == b.size() + ensures + - returns the mean squared error between all the elements of a and b. + (i.e. mean(squared(mat(a)-mat(b)))) + !*/ + +// ---------------------------------------------------------------------------------------- + + double binomial_random_vars_are_different ( + uint64_t k1, + uint64_t n1, + uint64_t k2, + uint64_t n2 + ); + /*! + requires + - k1 <= n1 + - k2 <= n2 + ensures + - Given two binomially distributed random variables, X1 and X2, we want to know + if these variables have the same parameter (i.e. the chance of "success"). + So assume that: + - You observed X1 to give k1 successes out of n1 trials. + - You observed X2 to give k2 successes out of n2 trials. + - This function performs a simple likelihood ratio test to determine if X1 and + X2 have the same parameter. The return value of this function will be: + - Close to 0 if they are probably the same. + - Larger than 0 if X1 probably has a higher "success" rate than X2. + - Smaller than 0 if X2 probably has a higher "success" rate than X1. + Moreover, the larger the absolute magnitude of the return value the more + likely it is that X1 and X2 have different distributions. + - For a discussion of the technique and applications see: + Dunning, Ted. "Accurate methods for the statistics of surprise and + coincidence." Computational linguistics 19.1 (1993): 61-74. + !*/ + +// ---------------------------------------------------------------------------------------- + + double event_correlation ( + uint64_t A_count, + uint64_t B_count, + uint64_t AB_count, + uint64_t total_num_observations + ); + /*! + requires + - AB_count <= A_count <= total_num_observations + - AB_count <= B_count <= total_num_observations + - A_count + B_count - AB_count <= total_num_observations + ensures + - This function does a statistical test to determine if two events co-occur in + a statistically significant way. In particular, we assume you performed + total_num_observations measurements and during those measurements you: + - Observed event A to happen A_count times. + - Observed event B to happen B_count times. + - Observed AB_count co-occurrences of the events. That is, AB_count is the + number of times the events happened together during the same measurement. + - This function returns a number, COR, which can take any real value. It has + the following interpretations: + - COR == 0: there is no evidence of correlation between the two events. + They appear to be unrelated. + - COR > 0: There is evidence that A and B co-occur together. That is, + they happen at the same times more often than you would expect if they + were independent events. The larger the magnitude of COR the more + evidence we have for the correlation. + - COR < 0: There is evidence that A and B are anti-correlated. That is, + when A happens B is unlikely to happen and vise versa. The larger the + magnitude of COR the more evidence we have for the anti-correlation. + - This function implements the simple likelihood ratio test discussed in the + following paper: + Dunning, Ted. "Accurate methods for the statistics of surprise and + coincidence." Computational linguistics 19.1 (1993): 61-74. + So for an extended discussion of the method see the above paper. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + class running_stats + { + /*! + REQUIREMENTS ON T + - T must be a float, double, or long double type + + INITIAL VALUE + - mean() == 0 + - current_n() == 0 + + WHAT THIS OBJECT REPRESENTS + This object represents something that can compute the running mean, + variance, skewness, and excess kurtosis of a stream of real numbers. + !*/ + public: + + running_stats( + ); + /*! + ensures + - this object is properly initialized + !*/ + + void clear( + ); + /*! + ensures + - this object has its initial value + - clears all memory of any previous data points + !*/ + + T current_n ( + ) const; + /*! + ensures + - returns the number of points given to this object so far. + !*/ + + void add ( + const T& val + ); + /*! + ensures + - updates the mean, variance, skewness, and kurtosis stored in this object + so that the new value is factored into them. + - #mean() == mean()*current_n()/(current_n()+1) + val/(current_n()+1). + (i.e. the updated mean value that takes the new value into account) + - #variance() == the updated variance that takes this new value into account. + - #skewness() == the updated skewness that takes this new value into account. + - #ex_kurtosis() == the updated kurtosis that takes this new value into account. + - #current_n() == current_n() + 1 + !*/ + + T mean ( + ) const; + /*! + ensures + - returns the mean of all the values presented to this object + so far. + !*/ + + T variance ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the unbiased sample variance of all the values presented to this + object so far. + !*/ + + T stddev ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the unbiased sampled standard deviation of all the values + presented to this object so far. + !*/ + + T skewness ( + ) const; + /*! + requires + - current_n() > 2 + ensures + - returns the unbiased sample skewness of all the values presented + to this object so far. + !*/ + + T ex_kurtosis( + ) const; + /*! + requires + - current_n() > 3 + ensures + - returns the unbiased sample kurtosis of all the values presented + to this object so far. + !*/ + + T max ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the largest value presented to this object so far. + !*/ + + T min ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the smallest value presented to this object so far. + !*/ + + T scale ( + const T& val + ) const; + /*! + requires + - current_n() > 1 + ensures + - return (val-mean())/stddev(); + !*/ + + running_stats operator+ ( + const running_stats& rhs + ) const; + /*! + ensures + - returns a new running_stats object that represents the combination of all + the values given to *this and rhs. That is, this function returns a + running_stats object, R, that is equivalent to what you would obtain if + all calls to this->add() and rhs.add() had instead been done to R. + !*/ + }; + + template <typename T> + void serialize ( + const running_stats<T>& item, + std::ostream& out + ); + /*! + provides serialization support + !*/ + + template <typename T> + void deserialize ( + running_stats<T>& item, + std::istream& in + ); + /*! + provides serialization support + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + class running_scalar_covariance + { + /*! + REQUIREMENTS ON T + - T must be a float, double, or long double type + + INITIAL VALUE + - mean_x() == 0 + - mean_y() == 0 + - current_n() == 0 + + WHAT THIS OBJECT REPRESENTS + This object represents something that can compute the running covariance + of a stream of real number pairs. + !*/ + + public: + + running_scalar_covariance( + ); + /*! + ensures + - this object is properly initialized + !*/ + + void clear( + ); + /*! + ensures + - this object has its initial value + - clears all memory of any previous data points + !*/ + + void add ( + const T& x, + const T& y + ); + /*! + ensures + - updates the statistics stored in this object so that + the new pair (x,y) is factored into them. + - #current_n() == current_n() + 1 + !*/ + + T current_n ( + ) const; + /*! + ensures + - returns the number of points given to this object so far. + !*/ + + T mean_x ( + ) const; + /*! + ensures + - returns the mean value of all x samples presented to this object + via add(). + !*/ + + T mean_y ( + ) const; + /*! + ensures + - returns the mean value of all y samples presented to this object + via add(). + !*/ + + T covariance ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the covariance between all the x and y samples presented + to this object via add() + !*/ + + T correlation ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the correlation coefficient between all the x and y samples + presented to this object via add() + !*/ + + T variance_x ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the unbiased sample variance value of all x samples presented + to this object via add(). + !*/ + + T variance_y ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the unbiased sample variance value of all y samples presented + to this object via add(). + !*/ + + T stddev_x ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the unbiased sample standard deviation of all x samples + presented to this object via add(). + !*/ + + T stddev_y ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the unbiased sample standard deviation of all y samples + presented to this object via add(). + !*/ + + running_scalar_covariance operator+ ( + const running_covariance& rhs + ) const; + /*! + ensures + - returns a new running_scalar_covariance object that represents the + combination of all the values given to *this and rhs. That is, this + function returns a running_scalar_covariance object, R, that is + equivalent to what you would obtain if all calls to this->add() and + rhs.add() had instead been done to R. + !*/ + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + class running_scalar_covariance_decayed + { + /*! + REQUIREMENTS ON T + - T must be a float, double, or long double type + + INITIAL VALUE + - mean_x() == 0 + - mean_y() == 0 + - current_n() == 0 + + WHAT THIS OBJECT REPRESENTS + This object represents something that can compute the running covariance of + a stream of real number pairs. It is essentially the same as + running_scalar_covariance except that it forgets about data it has seen + after a certain period of time. It does this by exponentially decaying old + statistics. + !*/ + + public: + + running_scalar_covariance_decayed( + T decay_halflife = 1000 + ); + /*! + requires + - decay_halflife > 0 + ensures + - #forget_factor() == std::pow(0.5, 1/decay_halflife); + (i.e. after decay_halflife calls to add() the data given to the first add + will be down weighted by 0.5 in the statistics stored in this object). + !*/ + + T forget_factor ( + ) const; + /*! + ensures + - returns the exponential forget factor used to forget old statistics when + add() is called. + !*/ + + void add ( + const T& x, + const T& y + ); + /*! + ensures + - updates the statistics stored in this object so that + the new pair (x,y) is factored into them. + - #current_n() == current_n()*forget_factor() + forget_factor() + - Down weights old statistics by a factor of forget_factor(). + !*/ + + T current_n ( + ) const; + /*! + ensures + - returns the effective number of points given to this object. As add() + is called this value will converge to a constant, the value of which is + based on the decay_halflife supplied to the constructor. + !*/ + + T mean_x ( + ) const; + /*! + ensures + - returns the mean value of all x samples presented to this object + via add(). + !*/ + + T mean_y ( + ) const; + /*! + ensures + - returns the mean value of all y samples presented to this object + via add(). + !*/ + + T covariance ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the covariance between all the x and y samples presented + to this object via add() + !*/ + + T correlation ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the correlation coefficient between all the x and y samples + presented to this object via add() + !*/ + + T variance_x ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the sample variance value of all x samples presented + to this object via add(). + !*/ + + T variance_y ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the sample variance value of all y samples presented + to this object via add(). + !*/ + + T stddev_x ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the sample standard deviation of all x samples + presented to this object via add(). + !*/ + + T stddev_y ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the sample standard deviation of all y samples + presented to this object via add(). + !*/ + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename T + > + class running_stats_decayed + { + /*! + REQUIREMENTS ON T + - T must be a float, double, or long double type + + INITIAL VALUE + - mean() == 0 + - current_n() == 0 + + WHAT THIS OBJECT REPRESENTS + This object represents something that can compute the running mean and + variance of a stream of real numbers. It is similar to running_stats + except that it forgets about data it has seen after a certain period of + time. It does this by exponentially decaying old statistics. + !*/ + + public: + + running_stats_decayed( + T decay_halflife = 1000 + ); + /*! + requires + - decay_halflife > 0 + ensures + - #forget_factor() == std::pow(0.5, 1/decay_halflife); + (i.e. after decay_halflife calls to add() the data given to the first add + will be down weighted by 0.5 in the statistics stored in this object). + !*/ + + T forget_factor ( + ) const; + /*! + ensures + - returns the exponential forget factor used to forget old statistics when + add() is called. + !*/ + + void add ( + const T& x + ); + /*! + ensures + - updates the statistics stored in this object so that x is factored into + them. + - #current_n() == current_n()*forget_factor() + forget_factor() + - Down weights old statistics by a factor of forget_factor(). + !*/ + + T current_n ( + ) const; + /*! + ensures + - returns the effective number of points given to this object. As add() + is called this value will converge to a constant, the value of which is + based on the decay_halflife supplied to the constructor. + !*/ + + T mean ( + ) const; + /*! + ensures + - returns the mean value of all x samples presented to this object + via add(). + !*/ + + T variance ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the sample variance value of all x samples presented to this + object via add(). + !*/ + + T stddev ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the sample standard deviation of all x samples presented to this + object via add(). + !*/ + + }; + + template <typename T> + void serialize ( + const running_stats_decayed<T>& item, + std::ostream& out + ); + /*! + provides serialization support + !*/ + + template <typename T> + void deserialize ( + running_stats_decayed<T>& item, + std::istream& in + ); + /*! + provides serialization support + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class running_covariance + { + /*! + REQUIREMENTS ON matrix_type + Must be some type of dlib::matrix. + + INITIAL VALUE + - in_vector_size() == 0 + - current_n() == 0 + + WHAT THIS OBJECT REPRESENTS + This object is a simple tool for computing the mean and + covariance of a sequence of vectors. + !*/ + public: + + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef typename matrix_type::layout_type layout_type; + typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; + typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; + + running_covariance( + ); + /*! + ensures + - this object is properly initialized + !*/ + + void clear( + ); + /*! + ensures + - this object has its initial value + - clears all memory of any previous data points + !*/ + + long current_n ( + ) const; + /*! + ensures + - returns the number of samples that have been presented to this object + !*/ + + long in_vector_size ( + ) const; + /*! + ensures + - if (this object has been presented with any input vectors or + set_dimension() has been called) then + - returns the dimension of the column vectors used with this object + - else + - returns 0 + !*/ + + void set_dimension ( + long size + ); + /*! + requires + - size > 0 + ensures + - #in_vector_size() == size + - #current_n() == 0 + !*/ + + template <typename T> + void add ( + const T& val + ); + /*! + requires + - val must represent a column vector. It can either be a dlib::matrix + object or some kind of unsorted sparse vector type. See the top of + dlib/svm/sparse_vector_abstract.h for a definition of unsorted sparse vector. + - val must have a number of dimensions which is compatible with the current + setting of in_vector_size(). In particular, this means that the + following must hold: + - if (val is a dlib::matrix) then + - in_vector_size() == 0 || val.size() == val_vector_size() + - else + - max_index_plus_one(val) <= in_vector_size() + - in_vector_size() > 0 + (i.e. you must call set_dimension() prior to calling add() if + you want to use sparse vectors.) + ensures + - updates the mean and covariance stored in this object so that + the new value is factored into them. + - if (val is a dlib::matrix) then + - #in_vector_size() == val.size() + !*/ + + const column_matrix mean ( + ) const; + /*! + requires + - in_vector_size() != 0 + ensures + - returns the mean of all the vectors presented to this object + so far. + !*/ + + const general_matrix covariance ( + ) const; + /*! + requires + - in_vector_size() != 0 + - current_n() > 1 + ensures + - returns the unbiased sample covariance matrix for all the vectors + presented to this object so far. + !*/ + + const running_covariance operator+ ( + const running_covariance& item + ) const; + /*! + requires + - in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size() + (i.e. the in_vector_size() of *this and item must match or one must be zero) + ensures + - returns a new running_covariance object that represents the combination of all + the vectors given to *this and item. That is, this function returns a + running_covariance object, R, that is equivalent to what you would obtain if all + calls to this->add() and item.add() had instead been done to R. + !*/ + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class running_cross_covariance + { + /*! + REQUIREMENTS ON matrix_type + Must be some type of dlib::matrix. + + INITIAL VALUE + - x_vector_size() == 0 + - y_vector_size() == 0 + - current_n() == 0 + + WHAT THIS OBJECT REPRESENTS + This object is a simple tool for computing the mean and cross-covariance + matrices of a sequence of pairs of vectors. + !*/ + + public: + + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef typename matrix_type::layout_type layout_type; + typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; + typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; + + running_cross_covariance( + ); + /*! + ensures + - this object is properly initialized + !*/ + + void clear( + ); + /*! + ensures + - This object has its initial value. + - Clears all memory of any previous data points. + !*/ + + long x_vector_size ( + ) const; + /*! + ensures + - if (this object has been presented with any input vectors or + set_dimensions() has been called) then + - returns the dimension of the x vectors given to this object via add(). + - else + - returns 0 + !*/ + + long y_vector_size ( + ) const; + /*! + ensures + - if (this object has been presented with any input vectors or + set_dimensions() has been called) then + - returns the dimension of the y vectors given to this object via add(). + - else + - returns 0 + !*/ + + void set_dimensions ( + long x_size, + long y_size + ); + /*! + requires + - x_size > 0 + - y_size > 0 + ensures + - #x_vector_size() == x_size + - #y_vector_size() == y_size + - #current_n() == 0 + !*/ + + long current_n ( + ) const; + /*! + ensures + - returns the number of samples that have been presented to this object. + !*/ + + template <typename T, typename U> + void add ( + const T& x, + const U& y + ); + /*! + requires + - x and y must represent column vectors. They can either be dlib::matrix + objects or some kind of unsorted sparse vector type. See the top of + dlib/svm/sparse_vector_abstract.h for a definition of unsorted sparse vector. + - x and y must have a number of dimensions which is compatible with the + current setting of x_vector_size() and y_vector_size(). In particular, + this means that the following must hold: + - if (x or y is a sparse vector type) then + - x_vector_size() > 0 && y_vector_size() > 0 + (i.e. you must call set_dimensions() prior to calling add() if + you want to use sparse vectors.) + - if (x is a dlib::matrix) then + - x_vector_size() == 0 || x.size() == x_vector_size() + - else + - max_index_plus_one(x) <= x_vector_size() + - if (y is a dlib::matrix) then + - y_vector_size() == 0 || y.size() == y_vector_size() + - else + - max_index_plus_one(y) <= y_vector_size() + ensures + - updates the mean and cross-covariance matrices stored in this object so + that the new (x,y) vector pair is factored into them. + - if (x is a dlib::matrix) then + - #x_vector_size() == x.size() + - if (y is a dlib::matrix) then + - #y_vector_size() == y.size() + !*/ + + const column_matrix mean_x ( + ) const; + /*! + requires + - current_n() != 0 + ensures + - returns the mean of all the x vectors presented to this object so far. + - The returned vector will have x_vector_size() dimensions. + !*/ + + const column_matrix mean_y ( + ) const; + /*! + requires + - current_n() != 0 + ensures + - returns the mean of all the y vectors presented to this object so far. + - The returned vector will have y_vector_size() dimensions. + !*/ + + const general_matrix covariance_xy ( + ) const; + /*! + requires + - current_n() > 1 + ensures + - returns the unbiased sample cross-covariance matrix for all the vector + pairs presented to this object so far. In particular, returns a matrix + M such that: + - M.nr() == x_vector_size() + - M.nc() == y_vector_size() + - M == the cross-covariance matrix of the data given to add(). + !*/ + + const running_cross_covariance operator+ ( + const running_cross_covariance& item + ) const; + /*! + requires + - x_vector_size() == 0 || item.x_vector_size() == 0 || x_vector_size() == item.x_vector_size() + (i.e. the x_vector_size() of *this and item must match or one must be zero) + - y_vector_size() == 0 || item.y_vector_size() == 0 || y_vector_size() == item.y_vector_size() + (i.e. the y_vector_size() of *this and item must match or one must be zero) + ensures + - returns a new running_cross_covariance object that represents the + combination of all the vectors given to *this and item. That is, this + function returns a running_cross_covariance object, R, that is equivalent + to what you would obtain if all calls to this->add() and item.add() had + instead been done to R. + !*/ + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class vector_normalizer + { + /*! + REQUIREMENTS ON matrix_type + - must be a dlib::matrix object capable of representing column + vectors + + INITIAL VALUE + - in_vector_size() == 0 + - out_vector_size() == 0 + - means().size() == 0 + - std_devs().size() == 0 + + WHAT THIS OBJECT REPRESENTS + This object represents something that can learn to normalize a set + of column vectors. In particular, normalized column vectors should + have zero mean and a variance of one. + + Also, if desired, this object can use principal component + analysis for the purposes of reducing the number of elements in a + vector. + + THREAD SAFETY + Note that this object contains a cached matrix object it uses + to store intermediate results for normalization. This avoids + needing to reallocate it every time this object performs normalization + but also makes it non-thread safe. So make sure you don't share + instances of this object between threads. + !*/ + + public: + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef matrix_type result_type; + + template <typename vector_type> + void train ( + const vector_type& samples + ); + /*! + requires + - samples.size() > 0 + - samples == a column matrix or something convertible to a column + matrix via mat(). Also, x should contain + matrix_type objects that represent nonempty column vectors. + - samples does not contain any infinite or NaN values + ensures + - #in_vector_size() == samples(0).nr() + - #out_vector_size() == samples(0).nr() + - This object has learned how to normalize vectors that look like + vectors in the given set of samples. + - #means() == mean(samples) + - #std_devs() == reciprocal(sqrt(variance(samples))); + !*/ + + long in_vector_size ( + ) const; + /*! + ensures + - returns the number of rows that input vectors are + required to contain if they are to be normalized by + this object. + !*/ + + long out_vector_size ( + ) const; + /*! + ensures + - returns the number of rows in the normalized vectors + that come out of this object. + !*/ + + const matrix_type& means ( + ) const; + /*! + ensures + - returns a matrix M such that: + - M.nc() == 1 + - M.nr() == in_vector_size() + - M(i) == the mean of the ith input feature shown to train() + !*/ + + const matrix_type& std_devs ( + ) const; + /*! + ensures + - returns a matrix SD such that: + - SD.nc() == 1 + - SD.nr() == in_vector_size() + - SD(i) == the reciprocal of the standard deviation of the ith + input feature shown to train() + !*/ + + const result_type& operator() ( + const matrix_type& x + ) const; + /*! + requires + - x.nr() == in_vector_size() + - x.nc() == 1 + ensures + - returns a normalized version of x, call it Z, that has the + following properties: + - Z.nr() == out_vector_size() + - Z.nc() == 1 + - the mean of each element of Z is 0 + - the variance of each element of Z is 1 + - Z == pointwise_multiply(x-means(), std_devs()); + !*/ + + void swap ( + vector_normalizer& item + ); + /*! + ensures + - swaps *this and item + !*/ + }; + + template < + typename matrix_type + > + inline void swap ( + vector_normalizer<matrix_type>& a, + vector_normalizer<matrix_type>& b + ) { a.swap(b); } + /*! + provides a global swap function + !*/ + + template < + typename matrix_type, + > + void deserialize ( + vector_normalizer<matrix_type>& item, + std::istream& in + ); + /*! + provides deserialization support + !*/ + + template < + typename matrix_type, + > + void serialize ( + const vector_normalizer<matrix_type>& item, + std::ostream& out + ); + /*! + provides serialization support + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class vector_normalizer_pca + { + /*! + REQUIREMENTS ON matrix_type + - must be a dlib::matrix object capable of representing column + vectors + + INITIAL VALUE + - in_vector_size() == 0 + - out_vector_size() == 0 + - means().size() == 0 + - std_devs().size() == 0 + - pca_matrix().size() == 0 + + WHAT THIS OBJECT REPRESENTS + This object represents something that can learn to normalize a set + of column vectors. In particular, normalized column vectors should + have zero mean and a variance of one. + + Also, this object uses principal component analysis for the purposes + of reducing the number of elements in a vector. + + THREAD SAFETY + Note that this object contains a cached matrix object it uses + to store intermediate results for normalization. This avoids + needing to reallocate it every time this object performs normalization + but also makes it non-thread safe. So make sure you don't share + instances of this object between threads. + !*/ + + public: + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef matrix<scalar_type,0,1,mem_manager_type> result_type; + + template <typename vector_type> + void train ( + const vector_type& samples, + const double eps = 0.99 + ); + /*! + requires + - 0 < eps <= 1 + - samples.size() > 0 + - samples == a column matrix or something convertible to a column + matrix via mat(). Also, x should contain + matrix_type objects that represent nonempty column vectors. + - samples does not contain any infinite or NaN values + ensures + - This object has learned how to normalize vectors that look like + vectors in the given set of samples. + - Principal component analysis is performed to find a transform + that might reduce the number of output features. + - #in_vector_size() == samples(0).nr() + - 0 < #out_vector_size() <= samples(0).nr() + - eps is a number that controls how "lossy" the pca transform will be. + Large values of eps result in #out_vector_size() being larger and + smaller values of eps result in #out_vector_size() being smaller. + - #means() == mean(samples) + - #std_devs() == reciprocal(sqrt(variance(samples))); + - #pca_matrix() == the PCA transform matrix that is out_vector_size() + rows by in_vector_size() columns. + !*/ + + long in_vector_size ( + ) const; + /*! + ensures + - returns the number of rows that input vectors are + required to contain if they are to be normalized by + this object. + !*/ + + long out_vector_size ( + ) const; + /*! + ensures + - returns the number of rows in the normalized vectors + that come out of this object. + !*/ + + const matrix<scalar_type,0,1,mem_manager_type>& means ( + ) const; + /*! + ensures + - returns a matrix M such that: + - M.nc() == 1 + - M.nr() == in_vector_size() + - M(i) == the mean of the ith input feature shown to train() + !*/ + + const matrix<scalar_type,0,1,mem_manager_type>& std_devs ( + ) const; + /*! + ensures + - returns a matrix SD such that: + - SD.nc() == 1 + - SD.nr() == in_vector_size() + - SD(i) == the reciprocal of the standard deviation of the ith + input feature shown to train() + !*/ + + const matrix<scalar_type,0,0,mem_manager_type>& pca_matrix ( + ) const; + /*! + ensures + - returns a matrix PCA such that: + - PCA.nr() == out_vector_size() + - PCA.nc() == in_vector_size() + - PCA == the principal component analysis transformation + matrix + !*/ + + const result_type& operator() ( + const matrix_type& x + ) const; + /*! + requires + - x.nr() == in_vector_size() + - x.nc() == 1 + ensures + - returns a normalized version of x, call it Z, that has the + following properties: + - Z.nr() == out_vector_size() + - Z.nc() == 1 + - the mean of each element of Z is 0 + - the variance of each element of Z is 1 + - Z == pca_matrix()*pointwise_multiply(x-means(), std_devs()); + !*/ + + void swap ( + vector_normalizer_pca& item + ); + /*! + ensures + - swaps *this and item + !*/ + }; + + template < + typename matrix_type + > + inline void swap ( + vector_normalizer_pca<matrix_type>& a, + vector_normalizer_pca<matrix_type>& b + ) { a.swap(b); } + /*! + provides a global swap function + !*/ + + template < + typename matrix_type, + > + void deserialize ( + vector_normalizer_pca<matrix_type>& item, + std::istream& in + ); + /*! + provides deserialization support + !*/ + + template < + typename matrix_type, + > + void serialize ( + const vector_normalizer_pca<matrix_type>& item, + std::ostream& out + ); + /*! + provides serialization support + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_STATISTICs_ABSTRACT_ + diff --git a/ml/dlib/dlib/statistics/vector_normalizer_frobmetric.h b/ml/dlib/dlib/statistics/vector_normalizer_frobmetric.h new file mode 100644 index 000000000..690370f80 --- /dev/null +++ b/ml/dlib/dlib/statistics/vector_normalizer_frobmetric.h @@ -0,0 +1,618 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_ +#define DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_ + +#include "vector_normalizer_frobmetric_abstract.h" +#include "../matrix.h" +#include "../optimization.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + struct frobmetric_training_sample + { + matrix_type anchor_vect; + std::vector<matrix_type> near_vects; + std::vector<matrix_type> far_vects; + + unsigned long num_triples ( + ) const { return near_vects.size() * far_vects.size(); } + + void clear() + { + near_vects.clear(); + far_vects.clear(); + } + }; + + template < + typename matrix_type + > + void serialize(const frobmetric_training_sample<matrix_type>& item, std::ostream& out) + { + int version = 1; + serialize(version, out); + serialize(item.anchor_vect, out); + serialize(item.near_vects, out); + serialize(item.far_vects, out); + } + + template < + typename matrix_type + > + void deserialize(frobmetric_training_sample<matrix_type>& item, std::istream& in) + { + int version = 0; + deserialize(version, in); + if (version != 1) + throw serialization_error("Unexpected version found while deserializing dlib::frobmetric_training_sample."); + deserialize(item.anchor_vect, in); + deserialize(item.near_vects, in); + deserialize(item.far_vects, in); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class vector_normalizer_frobmetric + { + + public: + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef matrix_type result_type; + + private: + struct compact_frobmetric_training_sample + { + std::vector<matrix_type> near_vects; + std::vector<matrix_type> far_vects; + }; + + struct objective + { + objective ( + const std::vector<compact_frobmetric_training_sample>& samples_, + matrix<double,0,0,mem_manager_type>& Aminus_, + const matrix<double,0,1,mem_manager_type>& bias_ + ) : samples(samples_), Aminus(Aminus_), bias(bias_) {} + + double operator()(const matrix<double,0,1,mem_manager_type>& u) const + { + long idx = 0; + const long dims = samples[0].far_vects[0].size(); + // Here we compute \hat A from the paper, which we refer to as just A in + // the code. + matrix<double,0,0,mem_manager_type> A(dims,dims); + A = 0; + std::vector<double> ufar, unear; + for (unsigned long i = 0; i < samples.size(); ++i) + { + ufar.assign(samples[i].far_vects.size(),0); + unear.assign(samples[i].near_vects.size(),0); + for (unsigned long j = 0; j < unear.size(); ++j) + { + for (unsigned long k = 0; k < ufar.size(); ++k) + { + const double val = u(idx++); + ufar[k] -= val; + unear[j] += val; + } + } + for (unsigned long j = 0; j < unear.size(); ++j) + A += unear[j]*samples[i].near_vects[j]*trans(samples[i].near_vects[j]); + for (unsigned long j = 0; j < ufar.size(); ++j) + A += ufar[j]*samples[i].far_vects[j]*trans(samples[i].far_vects[j]); + } + + eigenvalue_decomposition<matrix<double,0,0,mem_manager_type> > ed(make_symmetric(A)); + Aminus = ed.get_pseudo_v()*diagm(upperbound(ed.get_real_eigenvalues(),0))*trans(ed.get_pseudo_v()); + // Do this to avoid numeric instability later on since the above + // computation can make Aminus slightly non-symmetric. + Aminus = make_symmetric(Aminus); + + return dot(u,bias) - 0.5*sum(squared(Aminus)); + } + + private: + const std::vector<compact_frobmetric_training_sample>& samples; + matrix<double,0,0,mem_manager_type>& Aminus; + const matrix<double,0,1,mem_manager_type>& bias; + }; + + struct derivative + { + derivative ( + unsigned long num_triples_, + const std::vector<compact_frobmetric_training_sample>& samples_, + matrix<double,0,0,mem_manager_type>& Aminus_, + const matrix<double,0,1,mem_manager_type>& bias_ + ) : num_triples(num_triples_), samples(samples_), Aminus(Aminus_), bias(bias_) {} + + matrix<double,0,1,mem_manager_type> operator()(const matrix<double,0,1,mem_manager_type>& ) const + { + // Note that Aminus is a function of u (the argument to this function), but + // since Aminus will have been computed already by the most recent call to + // the objective function we don't need to do anything with u. We can just + // use Aminus right away. + matrix<double,0,1,mem_manager_type> grad(num_triples); + + long idx = 0; + std::vector<double> ufar, unear; + for (unsigned long i = 0; i < samples.size(); ++i) + { + ufar.resize(samples[i].far_vects.size()); + unear.resize(samples[i].near_vects.size()); + + for (unsigned long j = 0; j < unear.size(); ++j) + unear[j] = sum(pointwise_multiply(Aminus, samples[i].near_vects[j]*trans(samples[i].near_vects[j]))); + for (unsigned long j = 0; j < ufar.size(); ++j) + ufar[j] = sum(pointwise_multiply(Aminus, samples[i].far_vects[j]*trans(samples[i].far_vects[j]))); + + for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j) + { + for (unsigned long k = 0; k < samples[i].far_vects.size(); ++k) + { + grad(idx) = bias(idx) + ufar[k]-unear[j]; + idx++; + } + } + } + + return grad; + } + + private: + const unsigned long num_triples; + const std::vector<compact_frobmetric_training_sample>& samples; + matrix<double,0,0,mem_manager_type>& Aminus; + const matrix<double,0,1,mem_manager_type>& bias; + }; + + + class custom_stop_strategy + { + public: + custom_stop_strategy( + double C_, + double eps_, + bool be_verbose_, + unsigned long max_iter_ + ) + { + _c = C_; + + _cur_iter = 0; + _gradient_thresh = eps_; + _max_iter = max_iter_; + _verbose = be_verbose_; + } + + template <typename T> + bool should_continue_search ( + const T& u, + const double , + const T& grad + ) + { + ++_cur_iter; + + double max_gradient = 0; + for (long i = 0; i < grad.size(); ++i) + { + const bool at_lower_bound = (0 >= u(i) && grad(i) > 0); + const bool at_upper_bound = (_c/grad.size() <= u(i) && grad(i) < 0); + if (!at_lower_bound && !at_upper_bound) + max_gradient = std::max(std::abs(grad(i)), max_gradient); + } + + if (_verbose) + { + std::cout << "iteration: " << _cur_iter << " max_gradient: "<< max_gradient << std::endl; + } + + // Only stop when the largest non-bound-constrained element of the gradient + // is lower than the threshold. + if (max_gradient < _gradient_thresh) + return false; + + // Check if we have hit the max allowable number of iterations. + if (_cur_iter > _max_iter) + { + return false; + } + + return true; + } + + private: + bool _verbose; + + unsigned long _max_iter; + unsigned long _cur_iter; + double _c; + double _gradient_thresh; + }; + + public: + vector_normalizer_frobmetric ( + ) + { + verbose = false; + eps = 0.1; + C = 1; + max_iter = 5000; + _use_identity_matrix_prior = false; + } + + bool uses_identity_matrix_prior ( + ) const + { + return _use_identity_matrix_prior; + } + + void set_uses_identity_matrix_prior ( + bool use_prior + ) + { + _use_identity_matrix_prior = use_prior; + } + + void be_verbose( + ) + { + verbose = true; + } + + void set_epsilon ( + double eps_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(eps_ > 0, + "\t void vector_normalizer_frobmetric::set_epsilon(eps_)" + << "\n\t invalid inputs were given to this function" + << "\n\t eps: " << eps_ + ); + eps = eps_; + } + + double get_epsilon ( + ) const + { + return eps; + } + + void set_c ( + double C_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(C_ > 0, + "\t void vector_normalizer_frobmetric::set_c()" + << "\n\t C_ must be greater than 0" + << "\n\t C_: " << C_ + << "\n\t this: " << this + ); + + C = C_; + } + + void set_max_iterations ( + unsigned long max_iterations + ) + { + max_iter = max_iterations; + } + + unsigned long get_max_iterations ( + ) const + { + return max_iter; + } + + double get_c ( + ) const + { + return C; + } + + void be_quiet ( + ) + { + verbose = false; + } + + void train ( + const std::vector<frobmetric_training_sample<matrix_type> >& samples + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(samples.size() > 0, + "\tvoid vector_normalizer_frobmetric::train()" + << "\n\t you have to give a nonempty set of samples to this function" + ); +#ifdef ENABLE_ASSERTS + { + const long dims = samples[0].anchor_vect.size(); + DLIB_ASSERT(dims != 0, + "\tvoid vector_normalizer_frobmetric::train()" + << "\n\t The dimension of the input vectors can't be zero." + ); + for (unsigned long i = 0; i < samples.size(); ++i) + { + DLIB_ASSERT(is_col_vector(samples[i].anchor_vect), + "\tvoid vector_normalizer_frobmetric::train()" + << "\n\t Invalid inputs were given to this function." + << "\n\t i: " << i + ); + DLIB_ASSERT(samples[i].anchor_vect.size() == dims, + "\tvoid vector_normalizer_frobmetric::train()" + << "\n\t Invalid inputs were given to this function." + << "\n\t i: " << i + << "\n\t dims: " << dims + << "\n\t samples[i].anchor_vect.size(): " << samples[i].anchor_vect.size() + ); + + DLIB_ASSERT(samples[i].num_triples() != 0, + "\tvoid vector_normalizer_frobmetric::train()" + << "\n\t It is illegal for a training sample to have no data in it" + << "\n\t i: " << i + ); + for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j) + { + DLIB_ASSERT(is_col_vector(samples[i].near_vects[j]), + "\tvoid vector_normalizer_frobmetric::train()" + << "\n\t Invalid inputs were given to this function." + << "\n\t i: " << i + << "\n\t j: " << j + ); + DLIB_ASSERT(samples[i].near_vects[j].size() == dims, + "\tvoid vector_normalizer_frobmetric::train()" + << "\n\t Invalid inputs were given to this function." + << "\n\t i: " << i + << "\n\t j: " << j + << "\n\t dims: " << dims + << "\n\t samples[i].near_vects[j].size(): " << samples[i].near_vects[j].size() + ); + } + for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j) + { + DLIB_ASSERT(is_col_vector(samples[i].far_vects[j]), + "\tvoid vector_normalizer_frobmetric::train()" + << "\n\t Invalid inputs were given to this function." + << "\n\t i: " << i + << "\n\t j: " << j + ); + DLIB_ASSERT(samples[i].far_vects[j].size() == dims, + "\tvoid vector_normalizer_frobmetric::train()" + << "\n\t Invalid inputs were given to this function." + << "\n\t i: " << i + << "\n\t j: " << j + << "\n\t dims: " << dims + << "\n\t samples[i].far_vects[j].size(): " << samples[i].far_vects[j].size() + ); + } + } + } +#endif // end ENABLE_ASSERTS + + + // compute the mean sample + m = 0; + for (unsigned long i = 0; i < samples.size(); ++i) + m += samples[i].anchor_vect; + m /= samples.size(); + + DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer_frobmetric::train() have infinite or NaN values"); + + // Now we need to find tform. So we setup the optimization problem and run it + // over the next few lines of code. + unsigned long num_triples = 0; + for (unsigned long i = 0; i < samples.size(); ++i) + num_triples += samples[i].near_vects.size()*samples[i].far_vects.size(); + + matrix<double,0,1,mem_manager_type> u(num_triples); + matrix<double,0,1,mem_manager_type> bias(num_triples); + u = 0; + bias = 1; + + + // precompute all the anchor_vect to far_vects/near_vects pairs + std::vector<compact_frobmetric_training_sample> data(samples.size()); + unsigned long cnt = 0; + std::vector<double> far_norm, near_norm; + for (unsigned long i = 0; i < data.size(); ++i) + { + far_norm.clear(); + near_norm.clear(); + data[i].far_vects.reserve(samples[i].far_vects.size()); + data[i].near_vects.reserve(samples[i].near_vects.size()); + for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j) + { + data[i].far_vects.push_back(samples[i].anchor_vect - samples[i].far_vects[j]); + if (_use_identity_matrix_prior) + far_norm.push_back(length_squared(data[i].far_vects.back())); + } + for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j) + { + data[i].near_vects.push_back(samples[i].anchor_vect - samples[i].near_vects[j]); + if (_use_identity_matrix_prior) + near_norm.push_back(length_squared(data[i].near_vects.back())); + } + + // Note that this loop only executes if _use_identity_matrix_prior == true. + for (unsigned long j = 0; j < near_norm.size(); ++j) + { + for (unsigned long k = 0; k < far_norm.size(); ++k) + { + bias(cnt++) = 1 - (far_norm[k] - near_norm[j]); + } + } + } + + // Now run the main part of the algorithm + matrix<double,0,0,mem_manager_type> Aminus; + find_max_box_constrained(lbfgs_search_strategy(10), + custom_stop_strategy(C, eps, verbose, max_iter), + objective(data, Aminus, bias), + derivative(num_triples, data, Aminus, bias), + u, 0, C/num_triples); + + + // What we need is the optimal Aminus which is a function of u. So we already + // have what we need and just need to put it into tform. + eigenvalue_decomposition<matrix<double,0,0,mem_manager_type> > ed(make_symmetric(-Aminus)); + matrix<double,0,1,mem_manager_type> eigs = ed.get_real_eigenvalues(); + // But first, discard the components that are zero to within the machine epsilon. + const double tol = max(eigs)*std::numeric_limits<double>::epsilon(); + for (long i = 0; i < eigs.size(); ++i) + { + if (eigs(i) < tol) + eigs(i) = 0; + } + if (_use_identity_matrix_prior) + tform = matrix_cast<scalar_type>(identity_matrix(Aminus) + diagm(sqrt(eigs))*trans(ed.get_pseudo_v())); + else + tform = matrix_cast<scalar_type>(diagm(sqrt(eigs))*trans(ed.get_pseudo_v())); + + // Pre-apply the transform to m so we don't have to do it inside operator() + // every time it's called. + m = tform*m; + } + + long in_vector_size ( + ) const + { + return m.nr(); + } + + long out_vector_size ( + ) const + { + return m.nr(); + } + + const matrix<scalar_type,0,1,mem_manager_type>& transformed_means ( + ) const + { + return m; + } + + const matrix<scalar_type,0,0,mem_manager_type>& transform ( + ) const + { + return tform; + } + + const result_type& operator() ( + const matrix_type& x + ) const + { + // make sure requires clause is not broken + DLIB_ASSERT(in_vector_size() != 0 && in_vector_size() == x.size() && + is_col_vector(x) == true, + "\tmatrix vector_normalizer_frobmetric::operator()" + << "\n\t you have given invalid arguments to this function" + << "\n\t in_vector_size(): " << in_vector_size() + << "\n\t x.size(): " << x.size() + << "\n\t is_col_vector(x): " << is_col_vector(x) + << "\n\t this: " << this + ); + + temp_out = tform*x-m; + return temp_out; + } + + template <typename mt> + friend void deserialize ( + vector_normalizer_frobmetric<mt>& item, + std::istream& in + ); + + template <typename mt> + friend void serialize ( + const vector_normalizer_frobmetric<mt>& item, + std::ostream& out + ); + + private: + + // ------------------- private data members ------------------- + + matrix_type m; + matrix<scalar_type,0,0,mem_manager_type> tform; + bool verbose; + double eps; + double C; + unsigned long max_iter; + bool _use_identity_matrix_prior; + + // This is just a temporary variable that doesn't contribute to the + // state of this object. + mutable matrix_type temp_out; + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + void serialize ( + const vector_normalizer_frobmetric<matrix_type>& item, + std::ostream& out + ) + { + const int version = 2; + serialize(version, out); + + serialize(item.m, out); + serialize(item.tform, out); + serialize(item.verbose, out); + serialize(item.eps, out); + serialize(item.C, out); + serialize(item.max_iter, out); + serialize(item._use_identity_matrix_prior, out); + } + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + void deserialize ( + vector_normalizer_frobmetric<matrix_type>& item, + std::istream& in + ) + { + int version = 0; + deserialize(version, in); + if (version != 1 && version != 2) + throw serialization_error("Unsupported version found while deserializing dlib::vector_normalizer_frobmetric."); + + deserialize(item.m, in); + deserialize(item.tform, in); + deserialize(item.verbose, in); + deserialize(item.eps, in); + deserialize(item.C, in); + deserialize(item.max_iter, in); + if (version == 2) + deserialize(item._use_identity_matrix_prior, in); + else + item._use_identity_matrix_prior = false; + } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_ + diff --git a/ml/dlib/dlib/statistics/vector_normalizer_frobmetric_abstract.h b/ml/dlib/dlib/statistics/vector_normalizer_frobmetric_abstract.h new file mode 100644 index 000000000..302628330 --- /dev/null +++ b/ml/dlib/dlib/statistics/vector_normalizer_frobmetric_abstract.h @@ -0,0 +1,328 @@ +// Copyright (C) 2013 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#undef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_ +#ifdef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_ + +#include "../matrix.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + struct frobmetric_training_sample + { + /*! + WHAT THIS OBJECT REPRESENTS + This object represents a training data sample for the + vector_normalizer_frobmetric object. It defines a set of training triplets + relative to a single anchor_vect vector. That is, it specifies that the + learned distance metric should satisfy num_triples() constraints which are, + for all valid i and j: + length(T*anchor_vect-T*near_vects[i]) + 1 < length(T*anchor_vect - T*far_vects[j]) + for some appropriate linear transformation T which will be learned by + vector_normalizer_frobmetric. + !*/ + + matrix_type anchor_vect; + std::vector<matrix_type> near_vects; + std::vector<matrix_type> far_vects; + + unsigned long num_triples ( + ) const { return near_vects.size() * far_vects.size(); } + /*! + ensures + - returns the number of training triplets defined by this object. + !*/ + + void clear() + /*! + ensures + - #near_vects.size() == 0 + - #far_vects.size() == 0 + !*/ + }; + + template < typename matrix_type > + void serialize(const frobmetric_training_sample<matrix_type>& item, std::ostream& out) + template < typename matrix_type > + void deserialize(frobmetric_training_sample<matrix_type>& item, std::istream& in) + /*! + provides serialisation support. + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + class vector_normalizer_frobmetric + { + /*! + REQUIREMENTS ON matrix_type + - must be a dlib::matrix object capable of representing column + vectors + + INITIAL VALUE + - in_vector_size() == 0 + - out_vector_size() == 0 + - get_epsilon() == 0.1 + - get_c() == 1 + - get_max_iterations() == 5000 + - This object is not verbose + - uses_identity_matrix_prior() == false + + WHAT THIS OBJECT REPRESENTS + This object is a tool for performing the FrobMetric distance metric + learning algorithm described in the following paper: + A Scalable Dual Approach to Semidefinite Metric Learning + By Chunhua Shen, Junae Kim, Lei Wang, in CVPR 2011 + + Therefore, this object is a tool that takes as input training triplets + (anchor_vect, near, far) of vectors and attempts to learn a linear + transformation T such that: + length(T*anchor_vect-T*near) + 1 < length(T*anchor_vect - T*far) + That is, you give a bunch of anchor_vect vectors and for each anchor_vect + you specify some vectors which should be near to it and some that should be + far form it. This object then tries to find a transformation matrix that + makes the "near" vectors close to their anchors while the "far" vectors are + farther away. + + THREAD SAFETY + Note that this object contains a cached matrix object it uses + to store intermediate results for normalization. This avoids + needing to reallocate it every time this object performs normalization + but also makes it non-thread safe. So make sure you don't share + instances of this object between threads. + !*/ + + public: + typedef typename matrix_type::mem_manager_type mem_manager_type; + typedef typename matrix_type::type scalar_type; + typedef matrix_type result_type; + + vector_normalizer_frobmetric ( + ); + /*! + ensures + - this object is properly initialized + !*/ + + bool uses_identity_matrix_prior ( + ) const; + /*! + ensures + - Normally this object will try and find a matrix transform() that + minimizes sum(squared(transform())) but also fits the training data. + However, if #uses_identity_matrix_prior() == true then it will instead + try to find the transformation matrix that minimizes + sum(squared(identity_matrix()-transform())). That is, it will try to + find the matrix most similar to the identity matrix that best fits the + training data. + !*/ + + void set_uses_identity_matrix_prior ( + bool use_prior + ); + /*! + ensures + - #uses_identity_matrix_prior() == use_prior + !*/ + + void be_verbose( + ); + /*! + ensures + - This object will print status messages to standard out so the user can + observe the progress of the train() routine. + !*/ + + void be_quiet ( + ); + /*! + ensures + - this object will not print anything to standard out. + !*/ + + void set_epsilon ( + double eps + ); + /*! + requires + - eps > 0 + ensures + - #get_epsilon() == eps + !*/ + + double get_epsilon ( + ) const; + /*! + ensures + - returns the error epsilon that determines when training should stop. + Smaller values may result in a more accurate solution but take longer to + execute. + !*/ + + void set_c ( + double C + ); + /*! + requires + - C > 0 + ensures + - #set_c() == C + !*/ + + double get_c ( + ) const; + /*! + ensures + - returns the regularization parameter. It is the parameter that + determines the trade-off between trying to fit the training data exactly + or allowing more errors but hopefully improving the generalization of the + resulting distance metric. Larger values encourage exact fitting while + smaller values of C may encourage better generalization. + !*/ + + void set_max_iterations ( + unsigned long max_iterations + ); + /*! + ensures + - #get_max_iterations() == max_iterations + !*/ + + unsigned long get_max_iterations ( + ) const; + /*! + ensures + - The train() routine uses an iterative numerical solver to find the best + distance metric. This function returns the maximum allowable number of + iterations it will use before terminating. Note that typically the + solver terminates prior to the max iteration count limit due to the error + dropping below get_epsilon(). + !*/ + + void train ( + const std::vector<frobmetric_training_sample<matrix_type> >& samples + ); + /*! + requires + - samples.size() != 0 + - All matrices inside samples (i.e. anchors and elements of near_vects and far_vects) + are column vectors with the same non-zero dimension. + - All the vectors in samples contain finite values. + - All elements of samples contain data, specifically, for all valid i: + - samples[i].num_triples() != 0 + ensures + - learns a distance metric from the given training samples. After train + finishes you can use this object's operator() to transform vectors + according to the learned distance metric. In particular, we will have: + - #transform() == The linear transformation learned by the FrobMetric + learning procedure. + - #in_vector_size() == samples[0].anchor_vect.size() + - You can call (*this)(x) to transform a vector according to the learned + distance metric. That is, it should generally be the case that: + - length((*this)(anchor_vect) - (*this)(near)) + 1 < length((*this)(anchor_vect) - (*this)(far)) + for the anchor_vect, near, and far vectors in the training data. + - #transformed_means() == the mean of the input anchor_vect vectors + after being transformed by #transform() + !*/ + + long in_vector_size ( + ) const; + /*! + ensures + - returns the number of rows that input vectors are required to contain if + they are to be normalized by this object. + !*/ + + long out_vector_size ( + ) const; + /*! + ensures + - returns the number of rows in the normalized vectors that come out of + this object. + - The value returned is always in_vector_size(). So out_vector_size() is + just provided to maintain interface consistency with other vector + normalizer objects. That is, the transformations applied by this object + do not change the dimensionality of the vectors. + !*/ + + const matrix<scalar_type,0,1,mem_manager_type>& transformed_means ( + ) const; + /*! + ensures + - returns a column vector V such that: + - V.size() == in_vector_size() + - V is a vector such that subtracting it from transformed vectors + results in them having an expected value of 0. Therefore, it is + equal to transform() times the mean of the input anchor_vect vectors + given to train(). + !*/ + + const matrix<scalar_type,0,0,mem_manager_type>& transform ( + ) const; + /*! + ensures + - returns a copy of the transformation matrix we learned during the last + call to train(). + - The returned matrix is square and has in_vector_size() by in_vector_size() + dimensions. + !*/ + + const result_type& operator() ( + const matrix_type& x + ) const; + /*! + requires + - in_vector_size() != 0 + - in_vector_size() == x.size() + - is_col_vector(x) == true + ensures + - returns a normalized version of x, call it Z, that has the following + properties: + - Z == The result of applying the linear transform we learned during + train() to the input vector x. + - Z == transform()*x-transformed_means() + - is_col_vector(Z) == true + - Z.size() == x.size() + - The expected value of each element of Z is 0. + !*/ + }; + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + void serialize ( + const vector_normalizer_frobmetric<matrix_type>& item, + std::ostream& out + ); + /*! + provides serialization support + !*/ + +// ---------------------------------------------------------------------------------------- + + template < + typename matrix_type + > + void deserialize ( + vector_normalizer_frobmetric<matrix_type>& item, + std::istream& in + ); + /*! + provides deserialization support + !*/ + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_ + |