diff options
Diffstat (limited to 'ml/dlib/dlib/statistics')
20 files changed, 0 insertions, 7913 deletions
diff --git a/ml/dlib/dlib/statistics/average_precision.h b/ml/dlib/dlib/statistics/average_precision.h deleted file mode 100644 index 6c2e7e0b1..000000000 --- a/ml/dlib/dlib/statistics/average_precision.h +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_AVERAGE_PREcISION_Hh_ -#define DLIB_AVERAGE_PREcISION_Hh_ - -#include "average_precision_abstract.h" -#include <vector> - - -namespace dlib -{ - namespace impl - { - inline bool get_bool_part ( - const bool& b - ) { return b; } - - template <typename T> - bool get_bool_part(const std::pair<T,bool>& item) { return item.second; } - } - -// ---------------------------------------------------------------------------------------- - - template <typename T, typename alloc> - double average_precision ( - const std::vector<T,alloc>& items, - unsigned long missing_relevant_items = 0 - ) - { - using namespace dlib::impl; - double relevant_count = 0; - // find the precision values - std::vector<double> precision; - for (unsigned long i = 0; i < items.size(); ++i) - { - if (get_bool_part(items[i])) - { - ++relevant_count; - precision.push_back(relevant_count / (i+1)); - } - } - - double precision_sum = 0; - double max_val = 0; - // now sum over the interpolated precision values - for (std::vector<double>::reverse_iterator i = precision.rbegin(); i != precision.rend(); ++i) - { - max_val = std::max(max_val, *i); - precision_sum += max_val; - } - - - relevant_count += missing_relevant_items; - - if (relevant_count != 0) - return precision_sum/relevant_count; - else - return 1; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_AVERAGE_PREcISION_Hh_ - diff --git a/ml/dlib/dlib/statistics/average_precision_abstract.h b/ml/dlib/dlib/statistics/average_precision_abstract.h deleted file mode 100644 index 76c2c702a..000000000 --- a/ml/dlib/dlib/statistics/average_precision_abstract.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_ -#ifdef DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_ - -#include <vector> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename alloc - > - double average_precision ( - const std::vector<bool,alloc>& items, - unsigned long missing_relevant_items = 0 - ); - /*! - ensures - - Interprets items as a list of relevant and non-relevant items in a response - from an information retrieval system. In particular, items with a true value - are relevant and false items are non-relevant. This function then returns - the average precision of the ranking of the given items. For, example, the - ranking [true, true, true, true, false] would have an average precision of 1. - On the other hand, the ranking of [true false false true] would have an - average precision of 0.75 (because the first true has a precision of 1 and - the second true has a precision of 0.5, giving an average of 0.75). - - As a special case, if item contains no true elements then the average - precision is considered to be 1. - - Note that we use the interpolated precision. That is, the interpolated - precision at a recall value r is set to the maximum precision obtained at any - higher recall value. Or in other words, we interpolate the precision/recall - curve so that precision is monotonically decreasing. Therefore, the average - precision value returned by this function is the area under this interpolated - precision/recall curve. - - This function will add in missing_relevant_items number of items with a - precision of zero into the average value returned. For example, the average - precision of the ranking [true, true] if there are 2 missing relevant items - is 0.5. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - double average_precision ( - const std::vector<std::pair<T,bool>,alloc>& items, - unsigned long missing_relevant_items = 0 - ); - /*! - ensures - - this function is equivalent to copying the bool values from items into a - std::vector<bool> and then calling the above average_precision() routine on - it and returning the result. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_AVERAGE_PREcISION_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/statistics/cca.h b/ml/dlib/dlib/statistics/cca.h deleted file mode 100644 index 21300ea8f..000000000 --- a/ml/dlib/dlib/statistics/cca.h +++ /dev/null @@ -1,186 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_CCA_hh_ -#define DLIB_CCA_hh_ - -#include "cca_abstract.h" -#include "../algs.h" -#include "../matrix.h" -#include "../sparse_vector.h" -#include "random_subset_selector.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - matrix<typename T::type,0,1> compute_correlations ( - const matrix_exp<T>& L, - const matrix_exp<T>& R - ) - { - DLIB_ASSERT( L.size() > 0 && R.size() > 0 && L.nr() == R.nr(), - "\t matrix compute_correlations()" - << "\n\t Invalid inputs were given to this function." - << "\n\t L.size(): " << L.size() - << "\n\t R.size(): " << R.size() - << "\n\t L.nr(): " << L.nr() - << "\n\t R.nr(): " << R.nr() - ); - - typedef typename T::type type; - matrix<type> A, B, C; - A = diag(trans(R)*L); - B = sqrt(diag(trans(L)*L)); - C = sqrt(diag(trans(R)*R)); - A = pointwise_multiply(A , reciprocal(pointwise_multiply(B,C))); - return A; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type, - typename T - > - matrix<T,0,1> impl_cca ( - const matrix_type& L, - const matrix_type& R, - matrix<T>& Ltrans, - matrix<T>& Rtrans, - unsigned long num_correlations, - unsigned long extra_rank, - unsigned long q, - unsigned long num_output_correlations, - double regularization - ) - { - matrix<T> Ul, Vl; - matrix<T> Ur, Vr; - matrix<T> U, V; - matrix<T,0,1> Dr, Dl, D; - - - // Note that we add a few more singular vectors in because it helps improve the - // final results if we run this part with a little higher rank than the final SVD. - svd_fast(L, Ul, Dl, Vl, num_correlations+extra_rank, q); - svd_fast(R, Ur, Dr, Vr, num_correlations+extra_rank, q); - - // Zero out singular values that are essentially zero so they don't cause numerical - // difficulties in the code below. - const double eps = std::numeric_limits<T>::epsilon()*std::max(max(Dr),max(Dl))*100; - Dl = round_zeros(Dl+regularization,eps); - Dr = round_zeros(Dr+regularization,eps); - - // This matrix is really small so we can do a normal full SVD on it. Note that we - // also throw away the columns of Ul and Ur corresponding to zero singular values. - svd3(diagm(Dl>0)*tmp(trans(Ul)*Ur)*diagm(Dr>0), U, D, V); - - // now throw away extra columns of the transformations. We do this in a way - // that keeps the directions that have the highest correlations. - matrix<T,0,1> temp = D; - rsort_columns(U, temp); - rsort_columns(V, D); - U = colm(U, range(0, num_output_correlations-1)); - V = colm(V, range(0, num_output_correlations-1)); - D = rowm(D, range(0, num_output_correlations-1)); - - Ltrans = Vl*inv(diagm(Dl))*U; - Rtrans = Vr*inv(diagm(Dr))*V; - - // Note that the D matrix contains the correlation values for the transformed - // vectors. However, when the L and R matrices have rank higher than - // num_correlations+extra_rank then the values in D become only approximate. - return D; - } - -// ---------------------------------------------------------------------------------------- - - template <typename T> - matrix<T,0,1> cca ( - const matrix<T>& L, - const matrix<T>& R, - matrix<T>& Ltrans, - matrix<T>& Rtrans, - unsigned long num_correlations, - unsigned long extra_rank = 5, - unsigned long q = 2, - double regularization = 0 - ) - { - DLIB_ASSERT( num_correlations > 0 && L.size() > 0 && R.size() > 0 && L.nr() == R.nr() && - regularization >= 0, - "\t matrix cca()" - << "\n\t Invalid inputs were given to this function." - << "\n\t num_correlations: " << num_correlations - << "\n\t regularization: " << regularization - << "\n\t L.size(): " << L.size() - << "\n\t R.size(): " << R.size() - << "\n\t L.nr(): " << L.nr() - << "\n\t R.nr(): " << R.nr() - ); - - using std::min; - const unsigned long n = min(num_correlations, (unsigned long)min(R.nr(),min(L.nc(), R.nc()))); - return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, n, regularization); - } - -// ---------------------------------------------------------------------------------------- - - template <typename sparse_vector_type, typename T> - matrix<T,0,1> cca ( - const std::vector<sparse_vector_type>& L, - const std::vector<sparse_vector_type>& R, - matrix<T>& Ltrans, - matrix<T>& Rtrans, - unsigned long num_correlations, - unsigned long extra_rank = 5, - unsigned long q = 2, - double regularization = 0 - ) - { - DLIB_ASSERT( num_correlations > 0 && L.size() == R.size() && - max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0 && - regularization >= 0, - "\t matrix cca()" - << "\n\t Invalid inputs were given to this function." - << "\n\t num_correlations: " << num_correlations - << "\n\t regularization: " << regularization - << "\n\t L.size(): " << L.size() - << "\n\t R.size(): " << R.size() - << "\n\t max_index_plus_one(L): " << max_index_plus_one(L) - << "\n\t max_index_plus_one(R): " << max_index_plus_one(R) - ); - - using std::min; - const unsigned long n = min(max_index_plus_one(L), max_index_plus_one(R)); - const unsigned long num_output_correlations = min(num_correlations, std::min<unsigned long>(R.size(),n)); - return impl_cca(L,R,Ltrans, Rtrans, num_correlations, extra_rank, q, num_output_correlations, regularization); - } - -// ---------------------------------------------------------------------------------------- - - template <typename sparse_vector_type, typename Rand_type, typename T> - matrix<T,0,1> cca ( - const random_subset_selector<sparse_vector_type,Rand_type>& L, - const random_subset_selector<sparse_vector_type,Rand_type>& R, - matrix<T>& Ltrans, - matrix<T>& Rtrans, - unsigned long num_correlations, - unsigned long extra_rank = 5, - unsigned long q = 2 - ) - { - return cca(L.to_std_vector(), R.to_std_vector(), Ltrans, Rtrans, num_correlations, extra_rank, q); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CCA_hh_ - - diff --git a/ml/dlib/dlib/statistics/cca_abstract.h b/ml/dlib/dlib/statistics/cca_abstract.h deleted file mode 100644 index 8e0b4e742..000000000 --- a/ml/dlib/dlib/statistics/cca_abstract.h +++ /dev/null @@ -1,191 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_CCA_AbSTRACT_Hh_ -#ifdef DLIB_CCA_AbSTRACT_Hh_ - -#include "../matrix/matrix_la_abstract.h" -#include "random_subset_selector_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - matrix<typename T::type,0,1> compute_correlations ( - const matrix_exp<T>& L, - const matrix_exp<T>& R - ); - /*! - requires - - L.size() > 0 - - R.size() > 0 - - L.nr() == R.nr() - ensures - - This function treats L and R as sequences of paired row vectors. It - then computes the correlation values between the elements of these - row vectors. In particular, we return a vector COR such that: - - COR.size() == L.nc() - - for all valid i: - - COR(i) == the correlation coefficient between the following sequence - of paired numbers: (L(k,i), R(k,i)) for k: 0 <= k < L.nr(). - Therefore, COR(i) is a value between -1 and 1 inclusive where 1 - indicates perfect correlation and -1 perfect anti-correlation. Note - that this function assumes the input data vectors have been centered - (i.e. made to have zero mean). If this is not the case then it will - report inaccurate results. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - matrix<T,0,1> cca ( - const matrix<T>& L, - const matrix<T>& R, - matrix<T>& Ltrans, - matrix<T>& Rtrans, - unsigned long num_correlations, - unsigned long extra_rank = 5, - unsigned long q = 2, - double regularization = 0 - ); - /*! - requires - - num_correlations > 0 - - L.size() > 0 - - R.size() > 0 - - L.nr() == R.nr() - - regularization >= 0 - ensures - - This function performs a canonical correlation analysis between the row - vectors in L and R. That is, it finds two transformation matrices, Ltrans - and Rtrans, such that row vectors in the transformed matrices L*Ltrans and - R*Rtrans are as correlated as possible. That is, we try to find two transforms - such that the correlation values returned by compute_correlations(L*Ltrans, R*Rtrans) - would be maximized. - - Let N == min(num_correlations, min(R.nr(),min(L.nc(),R.nc()))) - (This is the actual number of elements in the transformed vectors. - Therefore, note that you can't get more outputs than there are rows or - columns in the input matrices.) - - #Ltrans.nr() == L.nc() - - #Ltrans.nc() == N - - #Rtrans.nr() == R.nc() - - #Rtrans.nc() == N - - This function assumes the data vectors in L and R have already been centered - (i.e. we assume the vectors have zero means). However, in many cases it is - fine to use uncentered data with cca(). But if it is important for your - problem then you should center your data before passing it to cca(). - - This function works with reduced rank approximations of the L and R matrices. - This makes it fast when working with large matrices. In particular, we use - the svd_fast() routine to find reduced rank representations of the input - matrices by calling it as follows: svd_fast(L, U,D,V, num_correlations+extra_rank, q) - and similarly for R. This means that you can use the extra_rank and q - arguments to cca() to influence the accuracy of the reduced rank - approximation. However, the default values should work fine for most - problems. - - returns an estimate of compute_correlations(L*#Ltrans, R*#Rtrans). The - returned vector should exactly match the output of compute_correlations() - when the reduced rank approximation to L and R is accurate and regularization - is set to 0. However, if this is not the case then the return value of this - function will deviate from compute_correlations(L*#Ltrans, R*#Rtrans). This - deviation can be used to check if the reduced rank approximation is working - or you need to increase extra_rank. - - The dimensions of the output vectors produced by L*#Ltrans or R*#Rtrans are - ordered such that the dimensions with the highest correlations come first. - That is, after applying the transforms produced by cca() to a set of vectors - you will find that dimension 0 has the highest correlation, then dimension 1 - has the next highest, and so on. This also means that the list of numbers - returned from cca() will always be listed in decreasing order. - - This function performs the ridge regression version of Canonical Correlation - Analysis when regularization is set to a value > 0. In particular, larger - values indicate the solution should be more heavily regularized. This can be - useful when the dimensionality of the data is larger than the number of - samples. - - A good discussion of CCA can be found in the paper "Canonical Correlation - Analysis" by David Weenink. In particular, this function is implemented - using equations 29 and 30 from his paper. We also use the idea of doing CCA - on a reduced rank approximation of L and R as suggested by Paramveer S. - Dhillon in his paper "Two Step CCA: A new spectral method for estimating - vector models of words". - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename sparse_vector_type, - typename T - > - matrix<T,0,1> cca ( - const std::vector<sparse_vector_type>& L, - const std::vector<sparse_vector_type>& R, - matrix<T>& Ltrans, - matrix<T>& Rtrans, - unsigned long num_correlations, - unsigned long extra_rank = 5, - unsigned long q = 2, - double regularization = 0 - ); - /*! - requires - - num_correlations > 0 - - L.size() == R.size() - - max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0 - (i.e. L and R can't represent empty matrices) - - L and R must contain sparse vectors (see the top of dlib/svm/sparse_vector_abstract.h - for a definition of sparse vector) - - regularization >= 0 - ensures - - This is just an overload of the cca() function defined above. Except in this - case we take a sparse representation of the input L and R matrices rather than - dense matrices. Therefore, in this case, we interpret L and R as matrices - with L.size() rows, where each row is defined by a sparse vector. So this - function does exactly the same thing as the above cca(). - - Note that you can apply the output transforms to a sparse vector with the - following code: - sparse_matrix_vector_multiply(trans(Ltrans), your_sparse_vector) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename sparse_vector_type, - typename Rand_type, - typename T - > - matrix<T,0,1> cca ( - const random_subset_selector<sparse_vector_type,Rand_type>& L, - const random_subset_selector<sparse_vector_type,Rand_type>& R, - matrix<T>& Ltrans, - matrix<T>& Rtrans, - unsigned long num_correlations, - unsigned long extra_rank = 5, - unsigned long q = 2, - double regularization = 0 - ); - /*! - requires - - num_correlations > 0 - - L.size() == R.size() - - max_index_plus_one(L) > 0 && max_index_plus_one(R) > 0 - (i.e. L and R can't represent empty matrices) - - L and R must contain sparse vectors (see the top of dlib/svm/sparse_vector_abstract.h - for a definition of sparse vector) - - regularization >= 0 - ensures - - returns cca(L.to_std_vector(), R.to_std_vector(), Ltrans, Rtrans, num_correlations, extra_rank, q) - (i.e. this is just a convenience function for calling the cca() routine when - your sparse vectors are contained inside a random_subset_selector rather than - a std::vector) - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_CCA_AbSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/statistics/dpca.h b/ml/dlib/dlib/statistics/dpca.h deleted file mode 100644 index cae784682..000000000 --- a/ml/dlib/dlib/statistics/dpca.h +++ /dev/null @@ -1,541 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_DPCA_h_ -#define DLIB_DPCA_h_ - -#include "dpca_abstract.h" -#include <limits> -#include <cmath> -#include "../algs.h" -#include "../matrix.h" -#include <iostream> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - class discriminant_pca - { - /*! - INITIAL VALUE - - vect_size == 0 - - total_count == 0 - - between_count == 0 - - within_count == 0 - - between_weight == 1 - - within_weight == 1 - - CONVENTION - - vect_size == in_vector_size() - - total_count == the number of times add_to_total_variance() has been called. - - within_count == the number of times add_to_within_class_variance() has been called. - - between_count == the number of times add_to_between_class_variance() has been called. - - between_weight == between_class_weight() - - within_weight == within_class_weight() - - - if (total_count != 0) - - total_sum == the sum of all vectors given to add_to_total_variance() - - the covariance of all the elements given to add_to_total_variance() is given - by: - - let avg == total_sum/total_count - - covariance == total_cov/total_count - avg*trans(avg) - - if (within_count != 0) - - within_cov/within_count == the normalized within class scatter matrix - - if (between_count != 0) - - between_cov/between_count == the normalized between class scatter matrix - !*/ - - public: - - struct discriminant_pca_error : public error - { - discriminant_pca_error(const std::string& message): error(message) {} - }; - - typedef typename matrix_type::mem_manager_type mem_manager_type; - typedef typename matrix_type::type scalar_type; - typedef typename matrix_type::layout_type layout_type; - typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; - typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; - - discriminant_pca ( - ) - { - clear(); - } - - void clear( - ) - { - total_count = 0; - between_count = 0; - within_count = 0; - - vect_size = 0; - - - between_weight = 1; - within_weight = 1; - - - total_sum.set_size(0); - between_cov.set_size(0,0); - total_cov.set_size(0,0); - within_cov.set_size(0,0); - } - - long in_vector_size ( - ) const - { - return vect_size; - } - - void set_within_class_weight ( - scalar_type weight - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(weight >= 0, - "\t void discriminant_pca::set_within_class_weight()" - << "\n\t You can't use negative weight values" - << "\n\t weight: " << weight - << "\n\t this: " << this - ); - - within_weight = weight; - } - - scalar_type within_class_weight ( - ) const - { - return within_weight; - } - - void set_between_class_weight ( - scalar_type weight - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(weight >= 0, - "\t void discriminant_pca::set_between_class_weight()" - << "\n\t You can't use negative weight values" - << "\n\t weight: " << weight - << "\n\t this: " << this - ); - - between_weight = weight; - } - - scalar_type between_class_weight ( - ) const - { - return between_weight; - } - - template <typename EXP1, typename EXP2> - void add_to_within_class_variance( - const matrix_exp<EXP1>& x, - const matrix_exp<EXP2>& y - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) && - x.size() == y.size() && - (in_vector_size() == 0 || x.size() == in_vector_size()), - "\t void discriminant_pca::add_to_within_class_variance()" - << "\n\t Invalid inputs were given to this function" - << "\n\t is_col_vector(x): " << is_col_vector(x) - << "\n\t is_col_vector(y): " << is_col_vector(y) - << "\n\t x.size(): " << x.size() - << "\n\t y.size(): " << y.size() - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t this: " << this - ); - - vect_size = x.size(); - if (within_count == 0) - { - within_cov = (x-y)*trans(x-y); - } - else - { - within_cov += (x-y)*trans(x-y); - } - ++within_count; - } - - template <typename EXP1, typename EXP2> - void add_to_between_class_variance( - const matrix_exp<EXP1>& x, - const matrix_exp<EXP2>& y - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) && - x.size() == y.size() && - (in_vector_size() == 0 || x.size() == in_vector_size()), - "\t void discriminant_pca::add_to_between_class_variance()" - << "\n\t Invalid inputs were given to this function" - << "\n\t is_col_vector(x): " << is_col_vector(x) - << "\n\t is_col_vector(y): " << is_col_vector(y) - << "\n\t x.size(): " << x.size() - << "\n\t y.size(): " << y.size() - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t this: " << this - ); - - vect_size = x.size(); - if (between_count == 0) - { - between_cov = (x-y)*trans(x-y); - } - else - { - between_cov += (x-y)*trans(x-y); - } - ++between_count; - } - - template <typename EXP> - void add_to_total_variance( - const matrix_exp<EXP>& x - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_col_vector(x) && (in_vector_size() == 0 || x.size() == in_vector_size()), - "\t void discriminant_pca::add_to_total_variance()" - << "\n\t Invalid inputs were given to this function" - << "\n\t is_col_vector(x): " << is_col_vector(x) - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t x.size(): " << x.size() - << "\n\t this: " << this - ); - - vect_size = x.size(); - if (total_count == 0) - { - total_cov = x*trans(x); - total_sum = x; - } - else - { - total_cov += x*trans(x); - total_sum += x; - } - ++total_count; - } - - const general_matrix dpca_matrix ( - const double eps = 0.99 - ) const - { - general_matrix dpca_mat; - general_matrix eigenvalues; - dpca_matrix(dpca_mat, eigenvalues, eps); - return dpca_mat; - } - - const general_matrix dpca_matrix_of_size ( - const long num_rows - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < num_rows && num_rows <= in_vector_size(), - "\t general_matrix discriminant_pca::dpca_matrix_of_size()" - << "\n\t Invalid inputs were given to this function" - << "\n\t num_rows: " << num_rows - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t this: " << this - ); - - general_matrix dpca_mat; - general_matrix eigenvalues; - dpca_matrix_of_size(dpca_mat, eigenvalues, num_rows); - return dpca_mat; - } - - void dpca_matrix ( - general_matrix& dpca_mat, - general_matrix& eigenvalues, - const double eps = 0.99 - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < eps && eps <= 1 && in_vector_size() != 0, - "\t void discriminant_pca::dpca_matrix()" - << "\n\t Invalid inputs were given to this function" - << "\n\t eps: " << eps - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t this: " << this - ); - - compute_dpca_matrix(dpca_mat, eigenvalues, eps, 0); - } - - void dpca_matrix_of_size ( - general_matrix& dpca_mat, - general_matrix& eigenvalues, - const long num_rows - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 < num_rows && num_rows <= in_vector_size(), - "\t general_matrix discriminant_pca::dpca_matrix_of_size()" - << "\n\t Invalid inputs were given to this function" - << "\n\t num_rows: " << num_rows - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t this: " << this - ); - - compute_dpca_matrix(dpca_mat, eigenvalues, 1, num_rows); - } - - void swap ( - discriminant_pca& item - ) - { - using std::swap; - swap(total_cov, item.total_cov); - swap(total_sum, item.total_sum); - swap(total_count, item.total_count); - swap(vect_size, item.vect_size); - swap(between_cov, item.between_cov); - - swap(between_count, item.between_count); - swap(between_weight, item.between_weight); - swap(within_cov, item.within_cov); - swap(within_count, item.within_count); - swap(within_weight, item.within_weight); - } - - friend void deserialize ( - discriminant_pca& item, - std::istream& in - ) - { - deserialize( item.total_cov, in); - deserialize( item.total_sum, in); - deserialize( item.total_count, in); - deserialize( item.vect_size, in); - deserialize( item.between_cov, in); - deserialize( item.between_count, in); - deserialize( item.between_weight, in); - deserialize( item.within_cov, in); - deserialize( item.within_count, in); - deserialize( item.within_weight, in); - } - - friend void serialize ( - const discriminant_pca& item, - std::ostream& out - ) - { - serialize( item.total_cov, out); - serialize( item.total_sum, out); - serialize( item.total_count, out); - serialize( item.vect_size, out); - serialize( item.between_cov, out); - serialize( item.between_count, out); - serialize( item.between_weight, out); - serialize( item.within_cov, out); - serialize( item.within_count, out); - serialize( item.within_weight, out); - } - - discriminant_pca operator+ ( - const discriminant_pca& item - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT((in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()) && - between_class_weight() == item.between_class_weight() && - within_class_weight() == item.within_class_weight(), - "\t discriminant_pca discriminant_pca::operator+()" - << "\n\t The two discriminant_pca objects being added must have compatible parameters" - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t item.in_vector_size(): " << item.in_vector_size() - << "\n\t between_class_weight(): " << between_class_weight() - << "\n\t item.between_class_weight(): " << item.between_class_weight() - << "\n\t within_class_weight(): " << within_class_weight() - << "\n\t item.within_class_weight(): " << item.within_class_weight() - << "\n\t this: " << this - ); - - discriminant_pca temp(item); - - // We need to make sure to ignore empty matrices. That's what these if statements - // are for. - - if (total_count != 0 && temp.total_count != 0) - { - temp.total_cov += total_cov; - temp.total_sum += total_sum; - temp.total_count += total_count; - } - else if (total_count != 0) - { - temp.total_cov = total_cov; - temp.total_sum = total_sum; - temp.total_count = total_count; - } - - if (between_count != 0 && temp.between_count != 0) - { - temp.between_cov += between_cov; - temp.between_count += between_count; - } - else if (between_count != 0) - { - temp.between_cov = between_cov; - temp.between_count = between_count; - } - - if (within_count != 0 && temp.within_count != 0) - { - temp.within_cov += within_cov; - temp.within_count += within_count; - } - else if (within_count != 0) - { - temp.within_cov = within_cov; - temp.within_count = within_count; - } - - return temp; - } - - discriminant_pca& operator+= ( - const discriminant_pca& rhs - ) - { - (*this + rhs).swap(*this); - return *this; - } - - private: - - void compute_dpca_matrix ( - general_matrix& dpca_mat, - general_matrix& eigenvalues, - const double eps, - long num_rows - ) const - { - general_matrix cov; - - // now combine the three measures of variance into a single matrix by using the - // within_weight and between_weight weights. - cov = get_total_covariance_matrix(); - if (within_count != 0) - cov -= within_weight*within_cov/within_count; - if (between_count != 0) - cov += between_weight*between_cov/between_count; - - - eigenvalue_decomposition<general_matrix> eig(make_symmetric(cov)); - - eigenvalues = eig.get_real_eigenvalues(); - dpca_mat = eig.get_pseudo_v(); - - // sort the eigenvalues and eigenvectors so that the biggest eigenvalues come first - rsort_columns(dpca_mat, eigenvalues); - - long num_vectors = 0; - if (num_rows == 0) - { - // Some of the eigenvalues might be negative. So first lets zero those out - // so they won't get considered. - eigenvalues = pointwise_multiply(eigenvalues > 0, eigenvalues); - // figure out how many eigenvectors we want in our dpca matrix - const double thresh = sum(eigenvalues)*eps; - double total = 0; - for (long r = 0; r < eigenvalues.size() && total < thresh; ++r) - { - // Don't even think about looking at eigenvalues that are 0. If we go this - // far then we have all we need. - if (eigenvalues(r) == 0) - break; - - ++num_vectors; - total += eigenvalues(r); - } - - if (num_vectors == 0) - throw discriminant_pca_error("While performing discriminant_pca, all eigenvalues were negative or 0"); - } - else - { - num_vectors = num_rows; - } - - - // So now we know we want to use num_vectors of the first eigenvectors. So - // pull those out and discard the rest. - dpca_mat = trans(colm(dpca_mat,range(0,num_vectors-1))); - - // also clip off the eigenvalues we aren't using - eigenvalues = rowm(eigenvalues, range(0,num_vectors-1)); - - } - - general_matrix get_total_covariance_matrix ( - ) const - /*! - ensures - - returns the covariance matrix of all the data given to the add_to_total_variance() - !*/ - { - // if we don't even know the dimensionality of the vectors we are dealing - // with then just return an empty matrix - if (vect_size == 0) - return general_matrix(); - - // we know the vector size but we have zero total covariance. - if (total_count == 0) - { - general_matrix temp(vect_size,vect_size); - temp = 0; - return temp; - } - - // In this case we actually have something to make a total covariance matrix out of. - // So do that. - column_matrix avg = total_sum/total_count; - - return total_cov/total_count - avg*trans(avg); - } - - general_matrix total_cov; - column_matrix total_sum; - scalar_type total_count; - - long vect_size; - - general_matrix between_cov; - scalar_type between_count; - scalar_type between_weight; - - general_matrix within_cov; - scalar_type within_count; - scalar_type within_weight; - }; - - template < - typename matrix_type - > - inline void swap ( - discriminant_pca<matrix_type>& a, - discriminant_pca<matrix_type>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_DPCA_h_ - - diff --git a/ml/dlib/dlib/statistics/dpca_abstract.h b/ml/dlib/dlib/statistics/dpca_abstract.h deleted file mode 100644 index d9eef635b..000000000 --- a/ml/dlib/dlib/statistics/dpca_abstract.h +++ /dev/null @@ -1,365 +0,0 @@ -// Copyright (C) 2009 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_DPCA_ABSTRaCT_ -#ifdef DLIB_DPCA_ABSTRaCT_ - -#include <limits> -#include <cmath> -#include "../matrix/matrix_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - class discriminant_pca - { - /*! - REQUIREMENTS ON matrix_type - Must be some type of dlib::matrix. - - INITIAL VALUE - - in_vector_size() == 0 - - between_class_weight() == 1 - - within_class_weight() == 1 - - WHAT THIS OBJECT REPRESENTS - This object implements the Discriminant PCA technique described in the paper: - A New Discriminant Principal Component Analysis Method with Partial Supervision (2009) - by Dan Sun and Daoqiang Zhang - - This algorithm is basically a straightforward generalization of the classical PCA - technique to handle partially labeled data. It is useful if you want to learn a linear - dimensionality reduction rule using a bunch of data that is partially labeled. - - It functions by estimating three different scatter matrices. The first is the total scatter - matrix St (i.e. the total data covariance matrix), the second is the between class scatter - matrix Sb (basically a measure of the variance between data of different classes) and the - third is the within class scatter matrix Sw (a measure of the variance of data within the - same classes). - - Once these three matrices are estimated they are combined according to the following equation: - S = St + a*Sb - b*Sw - Where a and b are user supplied weights. Then the largest eigenvalues of the S matrix are - computed and their associated eigenvectors are returned as the output of this algorithm. - That is, the desired linear dimensionality reduction is given by the matrix with these - eigenvectors stored in its rows. - - Note that if a and b are set to 0 (or no labeled data is provided) then the output transformation - matrix is the same as the one produced by the classical PCA algorithm. - !*/ - - public: - - struct discriminant_pca_error : public error; - /*! - This exception is thrown if there is some error that prevents us from creating - a DPCA matrix. - !*/ - - typedef typename matrix_type::mem_manager_type mem_manager_type; - typedef typename matrix_type::type scalar_type; - typedef typename matrix_type::layout_type layout_type; - typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; - typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; - - discriminant_pca ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void clear( - ); - /*! - ensures - - #*this has its initial value - !*/ - - long in_vector_size ( - ) const; - /*! - ensures - - if (this object has been presented with any input vectors) then - - returns the dimension of the column vectors used with this object - - else - - returns 0 - !*/ - - void set_within_class_weight ( - scalar_type weight - ); - /*! - requires - - weight >= 0 - ensures - - #within_class_weight() == weight - !*/ - - scalar_type within_class_weight ( - ) const; - /*! - ensures - - returns the weight used when combining the within class scatter matrix with - the other scatter matrices. - !*/ - - void set_between_class_weight ( - scalar_type weight - ); - /*! - requires - - weight >= 0 - ensures - - #between_class_weight() == weight - !*/ - - scalar_type between_class_weight ( - ) const; - /*! - ensures - - returns the weight used when combining the between class scatter matrix with - the other scatter matrices. - !*/ - - void add_to_within_class_variance( - const matrix_exp& x, - const matrix_exp& y - ); - /*! - requires - - is_col_vector(x) == true - - is_col_vector(y) == true - - x.size() == y.size() - - if (in_vector_size() != 0) then - - x.size() == y.size() == in_vector_size() - ensures - - #in_vector_size() == x.size() - - Adds (x-y)*trans(x-y) to the within class scatter matrix. - (i.e. the direction given by (x-y) is recorded as being a direction associated - with within class variance and is therefore unimportant and will be weighted - less in the final dimensionality reduction) - !*/ - - void add_to_between_class_variance( - const matrix_exp& x, - const matrix_exp& y - ); - /*! - requires - - is_col_vector(x) == true - - is_col_vector(y) == true - - x.size() == y.size() - - if (in_vector_size() != 0) then - - x.size() == y.size() == in_vector_size() - ensures - - #in_vector_size() == x.size() - - Adds (x-y)*trans(x-y) to the between class scatter matrix. - (i.e. the direction given by (x-y) is recorded as being a direction associated - with between class variance and is therefore important and will be weighted - higher in the final dimensionality reduction) - !*/ - - void add_to_total_variance( - const matrix_exp& x - ); - /*! - requires - - is_col_vector(x) == true - - if (in_vector_size() != 0) then - - x.size() == in_vector_size() - ensures - - #in_vector_size() == x.size() - - let M denote the centroid (or mean) of all the data. Then this function - Adds (x-M)*trans(x-M) to the total scatter matrix. - (i.e. the direction given by (x-M) is recorded as being a direction associated - with unlabeled variance and is therefore of default importance and will be weighted - as described in the discriminant_pca class description.) - !*/ - - const general_matrix dpca_matrix ( - const double eps = 0.99 - ) const; - /*! - requires - - 0 < eps <= 1 - - in_vector_size() != 0 - (i.e. you have to have given this object some data) - ensures - - computes and returns the matrix MAT given by dpca_matrix(MAT,eigen,eps). - That is, this function returns the dpca_matrix computed by the function - defined below. - - Note that MAT is the desired linear transformation matrix. That is, - multiplying a vector by MAT performs the desired linear dimensionality reduction. - throws - - discriminant_pca_error - This exception is thrown if we are unable to create the dpca_matrix for some - reason. For example, if only within class examples have been given or - within_class_weight() is very large then all eigenvalues will be negative and - that prevents this algorithm from working properly. - !*/ - - void dpca_matrix ( - general_matrix& dpca_mat, - general_matrix& eigenvalues, - const double eps = 0.99 - ) const; - /*! - requires - - 0 < eps <= 1 - - in_vector_size() != 0 - (i.e. you have to have given this object some data) - ensures - - is_col_vector(#eigenvalues) == true - - #dpca_mat.nr() == eigenvalues.size() - - #dpca_mat.nc() == in_vector_size() - - rowm(#dpca_mat,i) represents the ith eigenvector of the S matrix described - in the class description and its eigenvalue is given by eigenvalues(i). - - all values in #eigenvalues are > 0. Moreover, the eigenvalues are in - sorted order with the largest eigenvalue stored at eigenvalues(0). - - (#dpca_mat)*trans(#dpca_mat) == identity_matrix. - (i.e. the rows of the dpca_matrix are all unit length vectors and are mutually - orthogonal) - - Note that #dpca_mat is the desired linear transformation matrix. That is, - multiplying a vector by #dpca_mat performs the desired linear dimensionality - reduction. - - sum(#eigenvalues) will be equal to about eps times the total sum of all - positive eigenvalues in the S matrix described in this class's description. - This means that eps is a number that controls how "lossy" the dimensionality - reduction will be. Large values of eps result in more output dimensions - while smaller values result in fewer. - throws - - discriminant_pca_error - This exception is thrown if we are unable to create the dpca_matrix for some - reason. For example, if only within class examples have been given or - within_class_weight() is very large then all eigenvalues will be negative and - that prevents this algorithm from working properly. - !*/ - - const general_matrix dpca_matrix_of_size ( - const long num_rows - ); - /*! - requires - - 0 < num_rows <= in_vector_size() - ensures - - computes and returns the matrix MAT given by dpca_matrix_of_size(MAT,eigen,num_rows). - That is, this function returns the dpca_matrix computed by the function - defined below. - - Note that MAT is the desired linear transformation matrix. That is, - multiplying a vector by MAT performs the desired linear dimensionality - reduction to num_rows dimensions. - !*/ - - void dpca_matrix_of_size ( - general_matrix& dpca_mat, - general_matrix& eigenvalues, - const long num_rows - ); - /*! - requires - - 0 < num_rows <= in_vector_size() - ensures - - is_col_vector(#eigenvalues) == true - - #dpca_mat.nr() == eigenvalues.size() - - #dpca_mat.nr() == num_rows - - #dpca_mat.nc() == in_vector_size() - - rowm(#dpca_mat,i) represents the ith eigenvector of the S matrix described - in the class description and its eigenvalue is given by eigenvalues(i). - - The values in #eigenvalues might be positive or negative. Additionally, the - eigenvalues are in sorted order with the largest eigenvalue stored at - eigenvalues(0). - - (#dpca_mat)*trans(#dpca_mat) == identity_matrix. - (i.e. the rows of the dpca_matrix are all unit length vectors and are mutually - orthogonal) - - Note that #dpca_mat is the desired linear transformation matrix. That is, - multiplying a vector by #dpca_mat performs the desired linear dimensionality - reduction to num_rows dimensions. - !*/ - - discriminant_pca operator+ ( - const discriminant_pca& item - ) const; - /*! - requires - - in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size() - (i.e. the in_vector_size() of *this and item must match or one must be zero) - - between_class_weight() == item.between_class_weight() - - within_class_weight() == item.within_class_weight() - ensures - - returns a new discriminant_pca object that represents the combination of all - the measurements given to *this and item. That is, this function returns a - discriminant_pca object, R, that is equivalent to what you would obtain if all - modifying calls (e.g. the add_to_*() functions) to *this and item had instead - been done to R. - !*/ - - discriminant_pca& operator+= ( - const discriminant_pca& rhs - ); - /*! - requires - - in_vector_size() == 0 || rhs.in_vector_size() == 0 || in_vector_size() == rhs.in_vector_size() - (i.e. the in_vector_size() of *this and rhs must match or one must be zero) - - between_class_weight() == rhs.between_class_weight() - - within_class_weight() == rhs.within_class_weight() - ensures - - #*this == *item + rhs - - returns #*this - !*/ - - void swap ( - discriminant_pca& item - ); - /*! - ensures - - swaps *this and item - !*/ - - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - inline void swap ( - discriminant_pca<matrix_type>& a, - discriminant_pca<matrix_type>& b - ) { a.swap(b); } - /*! - provides a global swap function - !*/ - - template < - typename matrix_type, - > - void deserialize ( - discriminant_pca<matrix_type>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - - template < - typename matrix_type, - > - void serialize ( - const discriminant_pca<matrix_type>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_DPCA_ABSTRaCT_ - diff --git a/ml/dlib/dlib/statistics/image_feature_sampling.h b/ml/dlib/dlib/statistics/image_feature_sampling.h deleted file mode 100644 index f04f9926e..000000000 --- a/ml/dlib/dlib/statistics/image_feature_sampling.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_IMAGE_FEATURE_SaMPLING_Hh_ -#define DLIB_IMAGE_FEATURE_SaMPLING_Hh_ - -#include "image_feature_sampling_abstract.h" -#include "../statistics.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type, - typename feature_extractor_type, - typename pyramid_type - > - random_subset_selector<typename feature_extractor_type::descriptor_type> randomly_sample_image_features ( - const image_array_type& images, - const pyramid_type& pyr, - const feature_extractor_type& fe_, - unsigned long num - ) - { - feature_extractor_type fe; - fe.copy_configuration(fe_); - random_subset_selector<typename feature_extractor_type::descriptor_type> basis; - basis.set_max_size(num); - - typedef typename image_array_type::type image_type; - image_type temp_img, temp_img2; - - for (unsigned long i = 0; i < images.size(); ++i) - { - bool at_pyramid_top = true; - while (true) - { - if (at_pyramid_top) - fe.load(images[i]); - else - fe.load(temp_img); - - if (fe.size() == 0) - break; - - for (long r = 0; r < fe.nr(); ++r) - { - for (long c = 0; c < fe.nc(); ++c) - { - if (basis.next_add_accepts()) - { - basis.add(fe(r,c)); - } - else - { - basis.add(); - } - } - } - - if (at_pyramid_top) - { - at_pyramid_top = false; - pyr(images[i], temp_img); - } - else - { - pyr(temp_img, temp_img2); - swap(temp_img2,temp_img); - } - } - } - return basis; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_IMAGE_FEATURE_SaMPLING_Hh_ - diff --git a/ml/dlib/dlib/statistics/image_feature_sampling_abstract.h b/ml/dlib/dlib/statistics/image_feature_sampling_abstract.h deleted file mode 100644 index b51ef5423..000000000 --- a/ml/dlib/dlib/statistics/image_feature_sampling_abstract.h +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (C) 2011 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_ -#ifdef DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_ - -#include "random_subset_selector_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename image_array_type, - typename feature_extractor_type, - typename pyramid_type - > - random_subset_selector<typename feature_extractor_type::descriptor_type> randomly_sample_image_features ( - const image_array_type& images, - const pyramid_type& pyr, - const feature_extractor_type& fe, - unsigned long num - ); - /*! - requires - - pyramid_type == a type compatible with the image pyramid objects defined - in dlib/image_transforms/image_pyramid_abstract.h - - feature_extractor_type == a local image feature extractor type such as the - dlib::hog_image - - image_array_type == an implementation of dlib/array/array_kernel_abstract.h - and it must contain image objects which can be passed to pyr() and fe.load() - and are swappable by global swap(). - ensures - - creates an image pyramid for each image in images and performs feature - extraction on each pyramid level. Then selects a random subsample of at - most num local feature vectors and returns it. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_IMAGE_FEATURE_SaMPLING_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/statistics/lda.h b/ml/dlib/dlib/statistics/lda.h deleted file mode 100644 index 38de3fd1e..000000000 --- a/ml/dlib/dlib/statistics/lda.h +++ /dev/null @@ -1,237 +0,0 @@ -// Copyright (C) 2014 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_LDA_Hh_ -#define DLIB_LDA_Hh_ - -#include "lda_abstract.h" -#include "../algs.h" -#include <map> -#include "../matrix.h" -#include <vector> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - namespace impl - { - - inline std::map<unsigned long,unsigned long> make_class_labels( - const std::vector<unsigned long>& row_labels - ) - { - std::map<unsigned long,unsigned long> class_labels; - for (unsigned long i = 0; i < row_labels.size(); ++i) - { - const unsigned long next = class_labels.size(); - if (class_labels.count(row_labels[i]) == 0) - class_labels[row_labels[i]] = next; - } - return class_labels; - } - - // ------------------------------------------------------------------------------------ - - template < - typename T - > - matrix<T,0,1> center_matrix ( - matrix<T>& X - ) - { - matrix<T,1> mean; - for (long r = 0; r < X.nr(); ++r) - mean += rowm(X,r); - mean /= X.nr(); - - for (long r = 0; r < X.nr(); ++r) - set_rowm(X,r) -= mean; - - return trans(mean); - } - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - void compute_lda_transform ( - matrix<T>& X, - matrix<T,0,1>& mean, - const std::vector<unsigned long>& row_labels, - unsigned long lda_dims = 500, - unsigned long extra_pca_dims = 200 - ) - { - std::map<unsigned long,unsigned long> class_labels = impl::make_class_labels(row_labels); - // LDA can only give out at most class_labels.size()-1 dimensions so don't try to - // compute more than that. - lda_dims = std::min<unsigned long>(lda_dims, class_labels.size()-1); - - // make sure requires clause is not broken - DLIB_CASSERT(class_labels.size() > 1, - "\t void compute_lda_transform()" - << "\n\t You can't call this function if the number of distinct class labels is less than 2." - ); - DLIB_CASSERT(X.size() != 0 && (long)row_labels.size() == X.nr() && lda_dims != 0, - "\t void compute_lda_transform()" - << "\n\t Invalid inputs were given to this function." - << "\n\t X.size(): " << X.size() - << "\n\t row_labels.size(): " << row_labels.size() - << "\n\t lda_dims: " << lda_dims - ); - - - mean = impl::center_matrix(X); - // Do PCA to reduce dims - matrix<T> pu,pw,pv; - svd_fast(X, pu, pw, pv, lda_dims+extra_pca_dims, 4); - pu.set_size(0,0); // free RAM, we don't need pu. - X = X*pv; - - - matrix<T> class_means(class_labels.size(), X.nc()); - class_means = 0; - matrix<T,0,1> class_counts(class_labels.size()); - class_counts = 0; - - // First compute the means of each class - for (unsigned long i = 0; i < row_labels.size(); ++i) - { - const unsigned long class_idx = class_labels[row_labels[i]]; - set_rowm(class_means,class_idx) += rowm(X,i); - class_counts(class_idx)++; - } - class_means = inv(diagm(class_counts))*class_means; - // subtract means from the data - for (unsigned long i = 0; i < row_labels.size(); ++i) - { - const unsigned long class_idx = class_labels[row_labels[i]]; - set_rowm(X,i) -= rowm(class_means,class_idx); - } - - // Note that we are using the formulas from the paper Using Discriminant - // Eigenfeatures for Image Retrieval by Swets and Weng. - matrix<T> Sw = trans(X)*X; - matrix<T> Sb = trans(class_means)*class_means; - matrix<T> A, H; - matrix<T,0,1> W; - svd3(Sw, A, W, H); - W = sqrt(W); - W = reciprocal(lowerbound(W,max(W)*1e-5)); - A = trans(H*diagm(W))*Sb*H*diagm(W); - matrix<T> v,s,u; - svd3(A, v, s, u); - matrix<T> tform = H*diagm(W)*u; - // pick out only the number of dimensions we are supposed to for the output, unless - // we should just keep them all, then don't do anything. - if ((long)lda_dims <= tform.nc()) - { - rsort_columns(tform, s); - tform = colm(tform, range(0, lda_dims-1)); - } - - X = trans(pv*tform); - mean = X*mean; - } - -// ---------------------------------------------------------------------------------------- - - inline std::pair<double,double> equal_error_rate ( - const std::vector<double>& low_vals, - const std::vector<double>& high_vals - ) - { - std::vector<std::pair<double,int> > temp; - temp.reserve(low_vals.size()+high_vals.size()); - for (unsigned long i = 0; i < low_vals.size(); ++i) - temp.push_back(std::make_pair(low_vals[i], -1)); - for (unsigned long i = 0; i < high_vals.size(); ++i) - temp.push_back(std::make_pair(high_vals[i], +1)); - - std::sort(temp.begin(), temp.end()); - - if (temp.size() == 0) - return std::make_pair(0,0); - - double thresh = temp[0].first; - - unsigned long num_low_wrong = low_vals.size(); - unsigned long num_high_wrong = 0; - double low_error = num_low_wrong/(double)low_vals.size(); - double high_error = num_high_wrong/(double)high_vals.size(); - for (unsigned long i = 0; i < temp.size() && high_error < low_error; ++i) - { - thresh = temp[i].first; - if (temp[i].second > 0) - { - num_high_wrong++; - high_error = num_high_wrong/(double)high_vals.size(); - } - else - { - num_low_wrong--; - low_error = num_low_wrong/(double)low_vals.size(); - } - } - - return std::make_pair((low_error+high_error)/2, thresh); - } - -// ---------------------------------------------------------------------------------------- - - struct roc_point - { - double true_positive_rate; - double false_positive_rate; - double detection_threshold; - }; - - inline std::vector<roc_point> compute_roc_curve ( - const std::vector<double>& true_detections, - const std::vector<double>& false_detections - ) - { - DLIB_CASSERT(true_detections.size() != 0); - DLIB_CASSERT(false_detections.size() != 0); - - std::vector<std::pair<double,int> > temp; - temp.reserve(true_detections.size()+false_detections.size()); - for (unsigned long i = 0; i < true_detections.size(); ++i) - temp.push_back(std::make_pair(true_detections[i], +1)); - for (unsigned long i = 0; i < false_detections.size(); ++i) - temp.push_back(std::make_pair(false_detections[i], -1)); - - std::sort(temp.rbegin(), temp.rend()); - - - std::vector<roc_point> roc_curve; - roc_curve.reserve(temp.size()); - - double num_false_included = 0; - double num_true_included = 0; - for (unsigned long i = 0; i < temp.size(); ++i) - { - if (temp[i].second > 0) - num_true_included++; - else - num_false_included++; - - roc_point p; - p.true_positive_rate = num_true_included/true_detections.size(); - p.false_positive_rate = num_false_included/false_detections.size(); - p.detection_threshold = temp[i].first; - roc_curve.push_back(p); - } - - return roc_curve; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_LDA_Hh_ - diff --git a/ml/dlib/dlib/statistics/lda_abstract.h b/ml/dlib/dlib/statistics/lda_abstract.h deleted file mode 100644 index ab9fd7a32..000000000 --- a/ml/dlib/dlib/statistics/lda_abstract.h +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright (C) 2014 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_LDA_ABSTRACT_Hh_ -#ifdef DLIB_LDA_ABSTRACT_Hh_ - -#include <map> -#include "../matrix.h" -#include <vector> - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - void compute_lda_transform ( - matrix<T>& X, - matrix<T,0,1>& M, - const std::vector<unsigned long>& row_labels, - unsigned long lda_dims = 500, - unsigned long extra_pca_dims = 200 - ); - /*! - requires - - X.size() != 0 - - row_labels.size() == X.nr() - - The number of distinct values in row_labels > 1 - - lda_dims != 0 - ensures - - We interpret X as a collection X.nr() of input vectors, where each row of X - is one of the vectors. - - We interpret row_labels[i] as the label of the vector rowm(X,i). - - This function performs the dimensionality reducing version of linear - discriminant analysis. That is, you give it a set of labeled vectors and it - returns a linear transform that maps the input vectors into a new space that - is good for distinguishing between the different classes. In particular, - this function finds matrices Z and M such that: - - Given an input vector x, Z*x-M, is the transformed version of x. That is, - Z*x-M maps x into a space where x vectors that share the same class label - are near each other. - - Z*x-M results in the transformed vectors having zero expected mean. - - Z.nr() <= lda_dims - (it might be less than lda_dims if there are not enough distinct class - labels to support lda_dims dimensions). - - Z.nc() == X.nc() - - We overwrite the input matrix X and store Z in it. Therefore, the - outputs of this function are in X and M. - - In order to deal with very high dimensional inputs, we perform PCA internally - to map the input vectors into a space of at most lda_dims+extra_pca_dims - prior to performing LDA. - !*/ - -// ---------------------------------------------------------------------------------------- - - std::pair<double,double> equal_error_rate ( - const std::vector<double>& low_vals, - const std::vector<double>& high_vals - ); - /*! - ensures - - This function finds a threshold T that best separates the elements of - low_vals from high_vals by selecting the threshold with equal error rate. In - particular, we try to pick a threshold T such that: - - for all valid i: - - high_vals[i] >= T - - for all valid i: - - low_vals[i] < T - Where the best T is determined such that the fraction of low_vals >= T is the - same as the fraction of high_vals < T. - - Let ERR == the equal error rate. I.e. the fraction of times low_vals >= T - and high_vals < T. Note that 0 <= ERR <= 1. - - returns make_pair(ERR,T) - !*/ - -// ---------------------------------------------------------------------------------------- - - struct roc_point - { - double true_positive_rate; - double false_positive_rate; - double detection_threshold; - }; - - std::vector<roc_point> compute_roc_curve ( - const std::vector<double>& true_detections, - const std::vector<double>& false_detections - ); - /*! - requires - - true_detections.size() != 0 - - false_detections.size() != 0 - ensures - - This function computes the ROC curve (receiver operating characteristic) - curve of the given data. Therefore, we interpret true_detections as - containing detection scores for a bunch of true detections and - false_detections as detection scores from a bunch of false detections. A - perfect detector would always give higher scores to true detections than to - false detections, resulting in a true positive rate of 1 and a false positive - rate of 0, for some appropriate detection threshold. - - Returns an array, ROC, such that: - - ROC.size() == true_detections.size()+false_detections.size() - - for all valid i: - - If you were to accept all detections with a score >= ROC[i].detection_threshold - then you would obtain a true positive rate of ROC[i].true_positive_rate and a - false positive rate of ROC[i].false_positive_rate. - - ROC is ordered such that low detection rates come first. That is, the - curve is swept from a high detection threshold to a low threshold. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_LDA_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/statistics/random_subset_selector.h b/ml/dlib/dlib/statistics/random_subset_selector.h deleted file mode 100644 index 17492363d..000000000 --- a/ml/dlib/dlib/statistics/random_subset_selector.h +++ /dev/null @@ -1,372 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_RANDOM_SUBSeT_SELECTOR_H_ -#define DLIB_RANDOM_SUBSeT_SELECTOR_H_ - -#include "random_subset_selector_abstract.h" -#include "../rand.h" -#include <vector> -#include "../algs.h" -#include "../string.h" -#include "../serialize.h" -#include "../matrix/matrix_mat.h" -#include <iostream> - -namespace dlib -{ - template < - typename T, - typename Rand_type = dlib::rand - > - class random_subset_selector - { - /*! - INITIAL VALUE - - _max_size == 0 - - items.size() == 0 - - count == 0 - - _next_add_accepts == false - - CONVENTION - - count == the number of times add() has been called since the last - time this object was empty. - - items.size() == size() - - max_size() == _max_size - - next_add_accepts() == _next_add_accepts - !*/ - public: - typedef T type; - typedef T value_type; - typedef default_memory_manager mem_manager_type; - typedef Rand_type rand_type; - - typedef typename std::vector<T>::iterator iterator; - typedef typename std::vector<T>::const_iterator const_iterator; - - - random_subset_selector ( - ) - { - _max_size = 0; - make_empty(); - } - - void set_seed(const std::string& value) - { - rnd.set_seed(value); - } - - void make_empty ( - ) - { - items.resize(0); - count = 0; - update_next_add_accepts(); - } - - const std::vector<T>& to_std_vector( - ) const { return items; } - - size_t size ( - ) const - { - return items.size(); - } - - void set_max_size ( - unsigned long new_max_size - ) - { - items.reserve(new_max_size); - make_empty(); - _max_size = new_max_size; - update_next_add_accepts(); - } - - unsigned long max_size ( - ) const - { - return _max_size; - } - - T& operator[] ( - unsigned long idx - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(idx < size(), - "\tvoid random_subset_selector::operator[]()" - << "\n\t idx is out of range" - << "\n\t idx: " << idx - << "\n\t size(): " << size() - << "\n\t this: " << this - ); - - return items[idx]; - } - - const T& operator[] ( - unsigned long idx - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(idx < size(), - "\tvoid random_subset_selector::operator[]()" - << "\n\t idx is out of range" - << "\n\t idx: " << idx - << "\n\t size(): " << size() - << "\n\t this: " << this - ); - - return items[idx]; - } - - iterator begin() { return items.begin(); } - const_iterator begin() const { return items.begin(); } - iterator end() { return items.end(); } - const_iterator end() const { return items.end(); } - - bool next_add_accepts ( - ) const - { - return _next_add_accepts; - } - - void add ( - const T& new_item - ) - { - if (items.size() < _max_size) - { - items.push_back(new_item); - // swap into a random place - exchange(items[rnd.get_random_32bit_number()%items.size()], items.back()); - } - else if (_next_add_accepts) - { - // pick a random element of items and replace it. - items[rnd.get_random_32bit_number()%items.size()] = new_item; - } - - update_next_add_accepts(); - ++count; - } - - void add ( - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(next_add_accepts() == false, - "\tvoid random_subset_selector::add()" - << "\n\t You should be calling the version of add() that takes an argument" - << "\n\t this: " << this - ); - - update_next_add_accepts(); - ++count; - } - - void swap ( - random_subset_selector& a - ) - { - items.swap(a.items); - std::swap(_max_size, a._max_size); - std::swap(count, a.count); - rnd.swap(a.rnd); - std::swap(_next_add_accepts, a._next_add_accepts); - } - - template <typename T1, typename T2> - friend void serialize ( - const random_subset_selector<T1,T2>& item, - std::ostream& out - ); - - template <typename T1, typename T2> - friend void deserialize ( - random_subset_selector<T1,T2>& item, - std::istream& in - ); - - private: - - void update_next_add_accepts ( - ) - { - if (items.size() < _max_size) - { - _next_add_accepts = true; - } - else if (_max_size == 0) - { - _next_add_accepts = false; - } - else - { - // At this point each element of items has had an equal chance of being in this object. - // In particular, the probability that each arrived here is currently items.size()/count. - // We need to be able to say that, after this function ends, the probability of any - // particular object ending up in items is items.size()/(count+1). So this means that - // we should decide to add a new item into items with this probability. Also, if we do - // so then we pick one of the current items and replace it at random with the new item. - - // Make me a random 64 bit number. This might seem excessive but I want this object - // to be able to handle an effectively infinite number of calls to add(). So count - // might get very large and we need to deal with that properly. - const unsigned long num1 = rnd.get_random_32bit_number(); - const unsigned long num2 = rnd.get_random_32bit_number(); - uint64 num = num1; - num <<= 32; - num |= num2; - - num %= (count+1); - - _next_add_accepts = (num < items.size()); - } - - } - - std::vector<T> items; - unsigned long _max_size; - uint64 count; - - rand_type rnd; - - bool _next_add_accepts; - - }; - - template < - typename T, - typename rand_type - > - void swap ( - random_subset_selector<T,rand_type>& a, - random_subset_selector<T,rand_type>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - - template <typename T1, typename T2> - void serialize ( - const random_subset_selector<T1,T2>& item, - std::ostream& out - ) - { - serialize(item.items, out); - serialize(item._max_size, out); - serialize(item.count, out); - serialize(item.rnd, out); - serialize(item._next_add_accepts, out); - } - - template <typename T1, typename T2> - void deserialize ( - random_subset_selector<T1,T2>& item, - std::istream& in - ) - { - deserialize(item.items, in); - deserialize(item._max_size, in); - deserialize(item.count, in); - deserialize(item.rnd, in); - deserialize(item._next_add_accepts, in); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - random_subset_selector<T> randomly_subsample ( - const std::vector<T,alloc>& samples, - unsigned long num - ) - { - random_subset_selector<T> subset; - subset.set_max_size(num); - for (unsigned long i = 0; i < samples.size(); ++i) - subset.add(samples[i]); - return subset; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc, - typename U - > - random_subset_selector<T> randomly_subsample ( - const std::vector<T,alloc>& samples, - unsigned long num, - const U& random_seed - ) - { - random_subset_selector<T> subset; - subset.set_seed(cast_to_string(random_seed)); - subset.set_max_size(num); - for (unsigned long i = 0; i < samples.size(); ++i) - subset.add(samples[i]); - return subset; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - random_subset_selector<T> randomly_subsample ( - const random_subset_selector<T>& samples, - unsigned long num - ) - { - random_subset_selector<T> subset; - subset.set_max_size(num); - for (unsigned long i = 0; i < samples.size(); ++i) - subset.add(samples[i]); - return subset; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - random_subset_selector<T> randomly_subsample ( - const random_subset_selector<T>& samples, - unsigned long num, - const U& random_seed - ) - { - random_subset_selector<T> subset; - subset.set_seed(cast_to_string(random_seed)); - subset.set_max_size(num); - for (unsigned long i = 0; i < samples.size(); ++i) - subset.add(samples[i]); - return subset; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - const matrix_op<op_array_to_mat<random_subset_selector<T> > > mat ( - const random_subset_selector<T>& m - ) - { - typedef op_array_to_mat<random_subset_selector<T> > op; - return matrix_op<op>(op(m)); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RANDOM_SUBSeT_SELECTOR_H_ - - diff --git a/ml/dlib/dlib/statistics/random_subset_selector_abstract.h b/ml/dlib/dlib/statistics/random_subset_selector_abstract.h deleted file mode 100644 index 96f8b545d..000000000 --- a/ml/dlib/dlib/statistics/random_subset_selector_abstract.h +++ /dev/null @@ -1,388 +0,0 @@ -// Copyright (C) 2010 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_ -#ifdef DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_ - -#include <vector> -#include "../rand/rand_kernel_abstract.h" -#include "../algs.h" -#include "../string.h" - -namespace dlib -{ - template < - typename T, - typename Rand_type = dlib::rand - > - class random_subset_selector - { - /*! - REQUIREMENTS ON T - T must be a copyable type - - REQUIREMENTS ON Rand_type - must be an implementation of dlib/rand/rand_kernel_abstract.h - - INITIAL VALUE - - size() == 0 - - max_size() == 0 - - next_add_accepts() == false - - WHAT THIS OBJECT REPRESENTS - This object is a tool to help you select a random subset of a large body of data. - In particular, it is useful when the body of data is too large to fit into memory. - - So for example, suppose you have 1000000 data samples and you want to select a - random subset of size 1000. Then you could do that as follows: - - random_subset_selector<sample_type> rand_subset; - rand_subset.set_max_size(1000) - for (int i = 0; i < 1000000; ++i) - rand_subset.add( get_next_data_sample()); - - - At the end of the for loop you will have your random subset of 1000 samples. And by - random I mean that each of the 1000000 data samples has an equal chance of ending - up in the rand_subset object. - - - Note that the above example calls get_next_data_sample() for each data sample. This - may be inefficient since most of the data samples are just ignored. An alternative - method that doesn't require you to load each sample can also be used. Consider the - following: - - random_subset_selector<sample_type> rand_subset; - rand_subset.set_max_size(1000) - for (int i = 0; i < 1000000; ++i) - if (rand_subset.next_add_accepts()) - rand_subset.add(get_data_sample(i)); - else - rand_subset.add() - - In the above example we only actually fetch the data sample into memory if we - know that the rand_subset would include it into the random subset. Otherwise, - we can just call the empty add(). - - - Finally, note that the random_subset_selector uses a deterministic pseudo-random - number generator under the hood. Moreover, the default constructor always seeds - the random number generator in the same way. So unless you call set_seed() - each instance of the random_subset_selector will function identically. - !*/ - public: - typedef T type; - typedef T value_type; - typedef default_memory_manager mem_manager_type; - typedef Rand_type rand_type; - - typedef typename std::vector<T>::iterator iterator; - typedef typename std::vector<T>::const_iterator const_iterator; - - random_subset_selector ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void set_seed( - const std::string& value - ); - /*! - ensures - - sets the seed of the random number generator that is embedded in - this object to the given value. - !*/ - - void make_empty ( - ); - /*! - ensures - - #size() == 0 - !*/ - - size_t size ( - ) const; - /*! - ensures - - returns the number of items of type T currently contained in this object - !*/ - - void set_max_size ( - unsigned long new_max_size - ); - /*! - ensures - - #max_size() == new_max_size - - #size() == 0 - !*/ - - unsigned long max_size ( - ) const; - /*! - ensures - - returns the maximum allowable size for this object - !*/ - - T& operator[] ( - unsigned long idx - ); - /*! - requires - - idx < size() - ensures - - returns a non-const reference to the idx'th element of this object - !*/ - - const T& operator[] ( - unsigned long idx - ) const; - /*! - requires - - idx < size() - ensures - - returns a const reference to the idx'th element of this object - !*/ - - bool next_add_accepts ( - ) const; - /*! - ensures - - if (the next call to add(item) will result in item being included - into *this) then - - returns true - - Note that the next item will always be accepted if size() < max_size(). - - else - - returns false - - Note that the next item will never be accepted if max_size() == 0. - !*/ - - void add ( - const T& new_item - ); - /*! - ensures - - if (next_add_accepts()) then - - places new_item into *this object at a random location - - if (size() < max_size()) then - - #size() == size() + 1 - - #next_add_accepts() == The updated information about the acceptance - of the next call to add() - !*/ - - void add ( - ); - /*! - requires - - next_add_accepts() == false - ensures - - This function does nothing but update the value of #next_add_accepts() - !*/ - - iterator begin( - ); - /*! - ensures - - if (size() > 0) then - - returns an iterator referring to the first element in - this container. - - else - - returns end() - !*/ - - const_iterator begin( - ) const; - /*! - ensures - - if (size() > 0) then - - returns a const_iterator referring to the first element in - this container. - - else - - returns end() - !*/ - - iterator end( - ); - /*! - ensures - - returns an iterator that represents one past the end of - this container - !*/ - - const_iterator end( - ) const; - /*! - ensures - - returns an iterator that represents one past the end of - this container - !*/ - - const std::vector<T>& to_std_vector( - ) const; - /*! - ensures - - returns a const reference to the underlying std::vector<T> that contains - all elements in this object. That is, this function returns a vector, V, - which has the following properties: - - V.size() == this->size() - - V.begin() == this->begin() - - V.end() == this->end() - !*/ - - void swap ( - random_subset_selector& item - ); - /*! - ensures - - swaps *this and item - !*/ - - }; - - template < - typename T, - typename rand_type - > - void swap ( - random_subset_selector<T,rand_type>& a, - random_subset_selector<T,rand_type>& b - ) { a.swap(b); } - /*! - provides global swap support - !*/ - - template < - typename T, - typename rand_type - > - void serialize ( - const random_subset_selector<T,rand_type>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - template < - typename T, - typename rand_type - > - void deserialize ( - random_subset_selector<T,rand_type>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - random_subset_selector<T> randomly_subsample ( - const std::vector<T,alloc>& samples, - unsigned long num - ); - /*! - ensures - - returns a random subset R such that: - - R contains a random subset of the given samples - - R.size() == min(num, samples.size()) - - R.max_size() == num - - The random number generator used by this function will always be - initialized in the same way. I.e. this function will always pick - the same random subsample if called multiple times. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc, - typename U - > - random_subset_selector<T> randomly_subsample ( - const std::vector<T,alloc>& samples, - unsigned long num, - const U& random_seed - ); - /*! - requires - - random_seed must be convertible to a string by dlib::cast_to_string() - ensures - - returns a random subset R such that: - - R contains a random subset of the given samples - - R.size() == min(num, samples.size()) - - R.max_size() == num - - The given random_seed will be used to initialize the random number - generator used by this function. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - random_subset_selector<T> randomly_subsample ( - const random_subset_selector<T>& samples, - unsigned long num - ); - /*! - ensures - - returns a random subset R such that: - - R contains a random subset of the given samples - - R.size() == min(num, samples.size()) - - R.max_size() == num - - The random number generator used by this function will always be - initialized in the same way. I.e. this function will always pick - the same random subsample if called multiple times. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename U - > - random_subset_selector<T> randomly_subsample ( - const random_subset_selector<T>& samples, - unsigned long num, - const U& random_seed - ); - /*! - requires - - random_seed must be convertible to a string by dlib::cast_to_string() - ensures - - returns a random subset R such that: - - R contains a random subset of the given samples - - R.size() == min(num, samples.size()) - - R.max_size() == num - - The given random_seed will be used to initialize the random number - generator used by this function. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - const matrix_exp mat ( - const random_subset_selector<T>& m - ); - /*! - ensures - - returns a matrix R such that: - - is_col_vector(R) == true - - R.size() == m.size() - - for all valid r: - R(r) == m[r] - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RANDOM_SUBSeT_SELECTOR_ABSTRACT_H_ - diff --git a/ml/dlib/dlib/statistics/running_gradient.h b/ml/dlib/dlib/statistics/running_gradient.h deleted file mode 100644 index d3f1b3ddf..000000000 --- a/ml/dlib/dlib/statistics/running_gradient.h +++ /dev/null @@ -1,370 +0,0 @@ -// Copyright (C) 2016 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_RuNNING_GRADIENT_Hh_ -#define DLIB_RuNNING_GRADIENT_Hh_ - -#include "running_gradient_abstract.h" -#include "../algs.h" -#include "../serialize.h" -#include <cmath> -#include "../matrix.h" -#include <algorithm> - - -namespace dlib -{ - class running_gradient - { - public: - - running_gradient ( - ) - { - clear(); - } - - void clear( - ) - { - n = 0; - R = identity_matrix<double>(2)*1e6; - w = 0; - residual_squared = 0; - } - - double current_n ( - ) const - { - return n; - } - - void add( - double y - ) - { - matrix<double,2,1> x; - x = n, 1; - - // Do recursive least squares computations - const double temp = 1 + trans(x)*R*x; - matrix<double,2,1> tmp = R*x; - R = R - (tmp*trans(tmp))/temp; - // R should always be symmetric. This line improves numeric stability of this algorithm. - R = 0.5*(R + trans(R)); - w = w + R*x*(y - trans(x)*w); - - // Also, recursively keep track of the residual error between the given value - // and what our linear predictor outputs. - residual_squared = residual_squared + std::pow((y - trans(x)*w),2.0)*temp; - - ++n; - } - - double gradient ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\t double running_gradient::gradient()" - << "\n\t You must add more values into this object before calling this function." - << "\n\t this: " << this - ); - - return w(0); - } - - double intercept ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 0, - "\t double running_gradient::intercept()" - << "\n\t You must add more values into this object before calling this function." - << "\n\t this: " << this - ); - - return w(1); - } - double standard_error ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 2, - "\t double running_gradient::standard_error()" - << "\n\t You must add more values into this object before calling this function." - << "\n\t this: " << this - ); - - - const double s = residual_squared/(n-2); - const double adjust = 12.0/(std::pow(current_n(),3.0) - current_n()); - return std::sqrt(s*adjust); - } - - double probability_gradient_less_than ( - double thresh - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 2, - "\t double running_gradient::probability_gradient_less_than()" - << "\n\t You must add more values into this object before calling this function." - << "\n\t this: " << this - ); - - return normal_cdf(thresh, gradient(), standard_error()); - } - - double probability_gradient_greater_than ( - double thresh - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 2, - "\t double running_gradient::probability_gradient_greater_than()" - << "\n\t You must add more values into this object before calling this function." - << "\n\t this: " << this - ); - - return 1-probability_gradient_less_than(thresh); - } - - friend void serialize (const running_gradient& item, std::ostream& out) - { - int version = 1; - serialize(version, out); - serialize(item.n, out); - serialize(item.R, out); - serialize(item.w, out); - serialize(item.residual_squared, out); - } - - friend void deserialize (running_gradient& item, std::istream& in) - { - int version = 0; - deserialize(version, in); - if (version != 1) - throw serialization_error("Unexpected version found while deserializing dlib::running_gradient."); - deserialize(item.n, in); - deserialize(item.R, in); - deserialize(item.w, in); - deserialize(item.residual_squared, in); - } - - private: - - static double normal_cdf(double value, double mean, double stddev) - { - if (stddev == 0) - { - if (value < mean) - return 0; - else if (value > mean) - return 1; - else - return 0.5; - } - value = (value-mean)/stddev; - return 0.5 * std::erfc(-value / std::sqrt(2.0)); - } - - double n; - matrix<double,2,2> R; - matrix<double,2,1> w; - double residual_squared; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - double probability_gradient_less_than ( - const T& container, - double thresh - ) - { - running_gradient g; - for(auto&& v : container) - g.add(v); - - // make sure requires clause is not broken - DLIB_ASSERT(g.current_n() > 2, - "\t double probability_gradient_less_than()" - << "\n\t You need more than 2 elements in the given container to call this function." - ); - return g.probability_gradient_less_than(thresh); - } - - template < - typename T - > - double probability_gradient_greater_than ( - const T& container, - double thresh - ) - { - running_gradient g; - for(auto&& v : container) - g.add(v); - - // make sure requires clause is not broken - DLIB_ASSERT(g.current_n() > 2, - "\t double probability_gradient_greater_than()" - << "\n\t You need more than 2 elements in the given container to call this function." - ); - return g.probability_gradient_greater_than(thresh); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - double find_upper_quantile ( - const T& container_, - double quantile - ) - { - DLIB_CASSERT(0 <= quantile && quantile <= 1.0); - - // copy container into a std::vector - std::vector<double> container(container_.begin(), container_.end()); - - DLIB_CASSERT(container.size() > 0); - - size_t idx_upper = std::round((container.size()-1)*(1-quantile)); - - std::nth_element(container.begin(), container.begin()+idx_upper, container.end()); - auto upper_q = *(container.begin()+idx_upper); - return upper_q; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - size_t count_steps_without_decrease ( - const T& container, - double probability_of_decrease = 0.51 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1, - "\t size_t count_steps_without_decrease()" - << "\n\t probability_of_decrease: "<< probability_of_decrease - ); - - running_gradient g; - size_t count = 0; - size_t j = 0; - for (auto i = container.rbegin(); i != container.rend(); ++i) - { - ++j; - g.add(*i); - if (g.current_n() > 2) - { - // Note that this only looks backwards because we are looping over the - // container backwards. So here we are really checking if the gradient isn't - // decreasing. - double prob_decreasing = g.probability_gradient_greater_than(0); - // If we aren't confident things are decreasing. - if (prob_decreasing < probability_of_decrease) - count = j; - } - } - return count; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - size_t count_steps_without_decrease_robust ( - const T& container, - double probability_of_decrease = 0.51, - double quantile_discard = 0.10 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0 <= quantile_discard && quantile_discard <= 1); - DLIB_ASSERT(0.5 < probability_of_decrease && probability_of_decrease < 1, - "\t size_t count_steps_without_decrease_robust()" - << "\n\t probability_of_decrease: "<< probability_of_decrease - ); - - if (container.size() == 0) - return 0; - - const auto quantile_thresh = find_upper_quantile(container, quantile_discard); - - running_gradient g; - size_t count = 0; - size_t j = 0; - for (auto i = container.rbegin(); i != container.rend(); ++i) - { - ++j; - // ignore values that are too large - if (*i <= quantile_thresh) - g.add(*i); - - if (g.current_n() > 2) - { - // Note that this only looks backwards because we are looping over the - // container backwards. So here we are really checking if the gradient isn't - // decreasing. - double prob_decreasing = g.probability_gradient_greater_than(0); - // If we aren't confident things are decreasing. - if (prob_decreasing < probability_of_decrease) - count = j; - } - } - return count; - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - size_t count_steps_without_increase ( - const T& container, - double probability_of_increase = 0.51 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(0.5 < probability_of_increase && probability_of_increase < 1, - "\t size_t count_steps_without_increase()" - << "\n\t probability_of_increase: "<< probability_of_increase - ); - - running_gradient g; - size_t count = 0; - size_t j = 0; - for (auto i = container.rbegin(); i != container.rend(); ++i) - { - ++j; - g.add(*i); - if (g.current_n() > 2) - { - // Note that this only looks backwards because we are looping over the - // container backwards. So here we are really checking if the gradient isn't - // increasing. - double prob_increasing = g.probability_gradient_less_than(0); - // If we aren't confident things are increasing. - if (prob_increasing < probability_of_increase) - count = j; - } - } - return count; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RuNNING_GRADIENT_Hh_ - - diff --git a/ml/dlib/dlib/statistics/running_gradient_abstract.h b/ml/dlib/dlib/statistics/running_gradient_abstract.h deleted file mode 100644 index a42e1c152..000000000 --- a/ml/dlib/dlib/statistics/running_gradient_abstract.h +++ /dev/null @@ -1,276 +0,0 @@ -// Copyright (C) 2016 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_ -#ifdef DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_ - - -namespace dlib -{ - class running_gradient - { - /*! - WHAT THIS OBJECT REPRESENTS - This object is a tool for estimating if a noisy sequence of numbers is - trending up or down and by how much. It does this by finding the least - squares fit of a line to the data and then allows you to perform a - statistical test on the slope of that line. - !*/ - - public: - - running_gradient ( - ); - /*! - ensures - - #current_n() == 0 - !*/ - - void clear( - ); - /*! - ensures - - #current_n() == 0 - - this object has its initial value - - clears all memory of any previous data points - !*/ - - double current_n ( - ) const; - /*! - ensures - - returns the number of values given to this object by add(). - !*/ - - void add( - double y - ); - /*! - ensures - - Updates the gradient() and standard_error() estimates in this object - based on the new y value. - - #current_n() == current_n() + 1 - !*/ - - double gradient ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - If we consider the values given to add() as time series data, we can - estimate the rate-of-change of those values. That is, how much, - typically, do those values change from sample to sample? The gradient() - function returns the current estimate. It does this by finding the least - squares fit of a line to the data given to add() and returning the slope - of this line. - !*/ - - double intercept ( - ) const; - /*! - requires - - current_n() > 0 - ensures - - This class fits a line to the time series data given to add(). This - function returns the intercept of that line while gradient() returns the - slope of that line. This means that, for example, the next point that - add() will see, as predicted by this best fit line, is the value - intercept() + current_n()*gradient(). - !*/ - - double standard_error ( - ) const; - /*! - requires - - current_n() > 2 - ensures - - returns the standard deviation of the estimate of gradient(). - !*/ - - double probability_gradient_less_than ( - double thresh - ) const; - /*! - requires - - current_n() > 2 - ensures - - If we can assume the values given to add() are linearly related to each - other and corrupted by Gaussian additive noise then our estimate of - gradient() is a random variable with a mean value of gradient() and a - standard deviation of standard_error(). This lets us compute the - probability that the true gradient of the data is less than thresh, which - is what this function returns. - !*/ - - double probability_gradient_greater_than ( - double thresh - ) const; - /*! - requires - - current_n() > 2 - ensures - - returns 1-probability_gradient_less_than(thresh) - !*/ - - }; - - void serialize ( - const running_gradient& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - void deserialize ( - running_gradient& item, - std::istream& in - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - double probability_gradient_less_than ( - const T& container, - double thresh - ); - /*! - requires - - container must be a container of double values that can be enumerated with a - range based for loop. - - The container must contain more than 2 elements. - ensures - - Puts all the elements of container into a running_gradient object, R, and - then returns R.probability_gradient_less_than(thresh). - !*/ - - template < - typename T - > - double probability_gradient_greater_than ( - const T& container, - double thresh - ); - /*! - requires - - container must be a container of double values that can be enumerated with a - range based for loop. - - The container must contain more than 2 elements. - ensures - - Puts all the elements of container into a running_gradient object, R, and - then returns R.probability_gradient_greater_than(thresh). - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - size_t count_steps_without_decrease ( - const T& container, - double probability_of_decrease = 0.51 - ); - /*! - requires - - container must be a container of double values that can be enumerated with - .rbegin() and .rend(). - - 0.5 < probability_of_decrease < 1 - ensures - - If you think of the contents of container as a potentially noisy time series, - then this function returns a count of how long the time series has gone - without noticeably decreasing in value. It does this by adding the - elements into a running_gradient object and counting how many elements, - starting with container.back(), that you need to examine before you are - confident that the series has been decreasing in value. Here, "confident of - decrease" means that the probability of decrease is >= probability_of_decrease. - - Setting probability_of_decrease to 0.51 means we count until we see even a - small hint of decrease, whereas a larger value of 0.99 would return a larger - count since it keeps going until it is nearly certain the time series is - decreasing. - - The max possible output from this function is container.size(). - !*/ - - template < - typename T - > - size_t count_steps_without_decrease_robust ( - const T& container, - double probability_of_decrease = 0.51, - double quantile_discard = 0.10 - ); - /*! - requires - - container must be a container of double values that can be enumerated with - .begin() and .end() as well as .rbegin() and .rend(). - - 0.5 < probability_of_decrease < 1 - - 0 <= quantile_discard <= 1 - ensures - - This function behaves just like - count_steps_without_decrease(container,probability_of_decrease) except that - it ignores values in container that are in the upper quantile_discard - quantile. So for example, if the quantile discard is 0.1 then the 10% - largest values in container are ignored. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - size_t count_steps_without_increase ( - const T& container, - double probability_of_increase = 0.51 - ); - /*! - requires - - container must be a container of double values that can be enumerated with - .rbegin() and .rend(). - - 0.5 < probability_of_increase < 1 - ensures - - If you think of the contents of container as a potentially noisy time series, - then this function returns a count of how long the time series has gone - without noticeably increasing in value. It does this by adding the - elements into a running_gradient object and counting how many elements, - starting with container.back(), that you need to examine before you are - confident that the series has been increasing in value. Here, "confident of - increase" means that the probability of increase is >= probability_of_increase. - - Setting probability_of_increase to 0.51 means we count until we see even a - small hint of increase, whereas a larger value of 0.99 would return a larger - count since it keeps going until it is nearly certain the time series is - increasing. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - double find_upper_quantile ( - const T& container, - double quantile - ); - /*! - requires - - container must be a container of double values that can be enumerated with - .begin() and .end(). - - 0 <= quantile <= 1 - - container.size() > 0 - ensures - - Finds and returns the value such that quantile percent of the values in - container are greater than it. For example, 0.5 would find the median value - in container while 0.1 would find the value that lower bounded the 10% - largest values in container. - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_RuNNING_GRADIENT_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/statistics/sammon.h b/ml/dlib/dlib/statistics/sammon.h deleted file mode 100644 index 1a3eb72a1..000000000 --- a/ml/dlib/dlib/statistics/sammon.h +++ /dev/null @@ -1,269 +0,0 @@ -// Copyright (C) 2012 Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_SAMMoN_Hh_ -#define DLIB_SAMMoN_Hh_ - -#include "sammon_abstract.h" -#include "../matrix.h" -#include "../algs.h" -#include "dpca.h" -#include <vector> - -namespace dlib -{ - - class sammon_projection - { - - public: - - // ------------------------------------------------------------------------------------ - - template <typename matrix_type> - std::vector<matrix<double,0,1> > operator() ( - const std::vector<matrix_type>& data, - const long num_dims - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(num_dims > 0, - "\t std::vector<matrix<double,0,1> > sammon_projection::operator()" - << "\n\t Invalid inputs were given to this function." - << "\n\t num_dims: " << num_dims - ); - std::vector<matrix<double,0,1> > result; // projections - if (data.size() == 0) - { - return result; - } - -#ifdef ENABLE_ASSERTS - DLIB_ASSERT(0 < num_dims && num_dims <= data[0].size(), - "\t std::vector<matrix<double,0,1> > sammon_projection::operator()" - << "\n\t Invalid inputs were given to this function." - << "\n\t data.size(): " << data.size() - << "\n\t num_dims: " << num_dims - << "\n\t data[0].size(): " << data[0].size() - ); - for (unsigned long i = 0; i < data.size(); ++i) - { - DLIB_ASSERT(is_col_vector(data[i]) && data[i].size() == data[0].size(), - "\t std::vector<matrix<double,0,1> > sammon_projection::operator()" - << "\n\t Invalid inputs were given to this function." - << "\n\t data["<<i<<"].size(): " << data[i].size() - << "\n\t data[0].size(): " << data[0].size() - << "\n\t is_col_vector(data["<<i<<"]): " << is_col_vector(data[i]) - ); - } -#endif - - double err; // error (discarded) - do_sammon_projection(data, num_dims, result, err); - return result; - } - - // ------------------------------------------------------------------------------------ - - template <typename matrix_type> - void operator() ( - const std::vector<matrix_type>& data, - const long num_dims, - std::vector<matrix<double,0,1> >& result, - double &err, - const unsigned long num_iters = 1000, - const double err_delta = 1.0e-9 - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(num_dims > 0 && num_iters > 0 && err_delta > 0.0, - "\t std::vector<matrix<double,0,1> > sammon_projection::operator()" - << "\n\t Invalid inputs were given to this function." - << "\n\t data.size(): " << data.size() - << "\n\t num_dims: " << num_dims - << "\n\t num_iters: " << num_iters - << "\n\t err_delta: " << err_delta - ); - if (data.size() == 0) - { - result.clear(); - err = 0; - return; - } - -#ifdef ENABLE_ASSERTS - DLIB_ASSERT(0 < num_dims && num_dims <= data[0].size(), - "\t std::vector<matrix<double,0,1> > sammon_projection::operator()" - << "\n\t Invalid inputs were given to this function." - << "\n\t data.size(): " << data.size() - << "\n\t num_dims: " << num_dims - << "\n\t data[0].size(): " << data[0].size() - ); - for (unsigned long i = 0; i < data.size(); ++i) - { - DLIB_ASSERT(is_col_vector(data[i]) && data[i].size() == data[0].size(), - "\t std::vector<matrix<double,0,1> > sammon_projection::operator()" - << "\n\t Invalid inputs were given to this function." - << "\n\t data["<<i<<"].size(): " << data[i].size() - << "\n\t data[0].size(): " << data[0].size() - << "\n\t is_col_vector(data["<<i<<"]): " << is_col_vector(data[i]) - ); - } -#endif - - do_sammon_projection(data, num_dims, result, err, num_iters, err_delta); - } - - // ---------------------------------------------------------------------------------------- - // ---------------------------------------------------------------------------------------- - - private: - - void compute_relative_distances( - matrix<double,0,1>& dist, // relative distances (output) - matrix<double,0,0>& data, // input data (matrix whose columns are the input vectors) - double eps_ratio = 1.0e-7 // to compute the minimum distance eps - ) - /*! - requires - - dist.nc() == comb( data.nc(), 2 ), preallocated - - eps_ratio > 0 - ensures - - dist[k] == lenght(data[i] - data[j]) for k = j(j-1)/2 + i - !*/ - { - const long N = data.nc(); // num of points - double eps; // minimum distance, forced to avoid vectors collision - // computed at runtime as eps_ration * mean(vectors distances) - for (int k = 0, i = 1; i < N; ++i) - for (int j = 0; j < i; ++j) - dist(k++) = length(colm(data, i) - colm(data, j)); - - eps = eps_ratio * mean(dist); - dist = lowerbound(dist, eps); - } - - // ---------------------------------------------------------------------------------------- - - template <typename matrix_type> - void do_sammon_projection( - const std::vector<matrix_type>& data, // input data - unsigned long num_dims, // dimension of the reduced space - std::vector<matrix<double,0,1> >& result, // projections (output) - double &err, // error (output) - unsigned long num_iters = 1000, // max num of iterations: stop condition - const double err_delta = 1.0e-9 // delta error: stop condition - ) - /*! - requires - - matrix_type should be a kind of dlib::matrix<double,N,1> - - num_dims > 0 - - num_iters > 0 - - err_delta > 0 - ensures - - result == a set of matrix<double,num_dims,1> objects that represent - the Sammon's projections of data vectors. - - err == the estimated error done in the projection, with the extra - property that err(at previous iteration) - err < err_delta - !*/ - { - // other params - const double mf = 0.3; // magic factor - - matrix<double> mdata; // input data as matrix - matrix<double> projs; // projected vectors, i.e. output data as matrix - - // std::vector<matrix> -> matrix - mdata.set_size(data[0].size(), data.size()); - for (unsigned int i = 0; i < data.size(); i++) - set_colm(mdata, i) = data[i]; - - const long N = mdata.nc(); // num of points - const long d = num_dims; // size of the reduced space - const long nd = N * (N - 1) / 2; // num of pairs of points = size of the distances vectors - - matrix<double, 0, 1> dsij, inv_dsij; // d*_ij: pair-wise distances in the input space (and inverses) - dsij.set_size(nd, 1); - inv_dsij.set_size(nd, 1); - double ic; // 1.0 / sum of dsij - - matrix<double, 0, 1> dij; // d_ij: pair-wise distances in the reduced space - dij.set_size(nd, 1); - - matrix<double, 0, 0> dE, dE2, dtemp; // matrices representing error partial derivatives - dE.set_size(d, N); - dE2.set_size(d, N); - dtemp.set_size(d, N); - - matrix<double, 0, 1> inv_dij, alpha; // utility vectors used to compute the partial derivatives - inv_dij.set_size(N, 1); // inv_dij is 1.0/dij, but we only need it column-wise - alpha.set_size(N, 1); // (slightly wasting a bit of computation) - // alpha = 1.0/dij - 1.0/dsij, again column-wise - - - // initialize projs with PCA - discriminant_pca<matrix<double> > dpca; - for (int i = 0; i < mdata.nc(); ++i) - { - dpca.add_to_total_variance(colm(mdata, i)); - } - matrix<double> mat = dpca.dpca_matrix_of_size(num_dims); - projs = mat * mdata; - - // compute dsij, inv_dsij and ic - compute_relative_distances(dsij, mdata); - inv_dsij = 1.0 / dsij; - ic = 1.0 / sum(dsij); - - // compute dij and err - compute_relative_distances(dij, projs); - err = ic * sum(pointwise_multiply(squared(dij - dsij), inv_dsij)); - - // start iterating - while (num_iters--) - { - // compute dE, dE2 progressively column by column - for (int p = 0; p < N; ++p) - { - // compute - // - alpha_p, the column vector with 1/d_pj - 1/d*_pj - // - dtemp, the matrix with the p-th column repeated all along - //TODO: optimize constructions - for (int i = 0; i < N; ++i) - { - int pos = (i < p) ? p * (p - 1) / 2 + i : i * (i - 1) / 2 + p; - inv_dij(i) = (i == p) ? 0.0 : 1.0 / dij(pos); - alpha(i) = (i == p) ? 0.0 : inv_dij(i) - inv_dsij(pos); - set_colm(dtemp, i) = colm(projs, p); - } - - dtemp -= projs; - set_colm(dE, p) = dtemp * alpha; - - double sum_alpha = sum(alpha); - set_colm(dE2, p) = abs( sum_alpha + squared(dtemp) * cubed(inv_dij) ); - } - - - // compute the update projections - projs += pointwise_multiply(dE, mf * reciprocal(dE2)); - - // compute new dij and error - compute_relative_distances(dij, projs); - double err_new = ic * sum( pointwise_multiply(squared(dij - dsij), inv_dsij) ); - if (err - err_new < err_delta) - break; - err = err_new; - } - - // matrix -> std::vector<matrix> - result.clear(); - for (int i = 0; i < projs.nc(); ++i) - result.push_back(colm(projs, i)); - } - - }; - -} // namespace dlib - -#endif // DLIB_SAMMoN_Hh_ - diff --git a/ml/dlib/dlib/statistics/sammon_abstract.h b/ml/dlib/dlib/statistics/sammon_abstract.h deleted file mode 100644 index 0e009729c..000000000 --- a/ml/dlib/dlib/statistics/sammon_abstract.h +++ /dev/null @@ -1,117 +0,0 @@ -// Copyright (C) 2012 Emanuele Cesena (emanuele.cesena@gmail.com), Davis E. King -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_SAMMoN_ABSTRACT_Hh_ -#ifdef DLIB_SAMMoN_ABSTRACT_Hh_ - -#include "../matrix/matrix_abstract.h" -#include <vector> - -namespace dlib -{ - - class sammon_projection - { - /*! - WHAT THIS OBJECT REPRESENTS - This is a function object that computes the Sammon projection of a set - of N points in a L-dimensional vector space onto a d-dimensional space - (d < L), according to the paper: - A Nonlinear Mapping for Data Structure Analysis (1969) by J.W. Sammon - - The current implementation is a vectorized version of the original algorithm. - !*/ - - public: - - sammon_projection( - ); - /*! - ensures - - this object is properly initialized - !*/ - - template <typename matrix_type> - std::vector<matrix<double,0,1> > operator() ( - const std::vector<matrix_type>& data, - const long num_dims - ); - /*! - requires - - num_dims > 0 - - matrix_type should be a kind of dlib::matrix of doubles capable - of representing column vectors. - - for all valid i: - - is_col_vector(data[i]) == true - - data[0].size() == data[i].size() - (i.e. all the vectors in data must have the same dimensionality) - - if (data.size() != 0) then - - 0 < num_dims <= data[0].size() - (i.e. you can't project into a higher dimension than the input data, - only to a lower dimension.) - ensures - - This routine computes Sammon's dimensionality reduction method based on the - given input data. It will attempt to project the contents of data into a - num_dims dimensional space that preserves relative distances between the - input data points. - - This function returns a std::vector, OUT, such that: - - OUT == a set of column vectors that represent the Sammon projection of - the input data vectors. - - OUT.size() == data.size() - - for all valid i: - - OUT[i].size() == num_dims - - OUT[i] == the Sammon projection of the input vector data[i] - !*/ - - template <typename matrix_type> - void operator() ( - const std::vector<matrix_type>& data, - const long num_dims, - std::vector<matrix<double,0,1> >& result, - double &err, - const unsigned long num_iters = 1000, - const double err_delta = 1.0e-9 - ); - /*! - requires - - num_iters > 0 - - err_delta > 0 - - num_dims > 0 - - matrix_type should be a kind of dlib::matrix of doubles capable - of representing column vectors. - - for all valid i: - - is_col_vector(data[i]) == true - - data[0].size() == data[i].size() - (i.e. all the vectors in data must have the same dimensionality) - - if (data.size() != 0) then - - 0 < num_dims <= data[0].size() - (i.e. you can't project into a higher dimension than the input data, - only to a lower dimension.) - ensures - - This routine computes Sammon's dimensionality reduction method based on the - given input data. It will attempt to project the contents of data into a - num_dims dimensional space that preserves relative distances between the - input data points. - - #err == the final error value at the end of the algorithm. The goal of Sammon's - algorithm is to find a lower dimensional projection of the input data that - preserves the relative distances between points. The value in #err is a measure - of the total error at the end of the algorithm. So smaller values indicate - a better projection was found than if a large value is returned via #err. - - Sammon's algorithm will run until either num_iters iterations has executed - or the change in error from one iteration to the next is less than err_delta. - - Upon completion, the output of Sammon's projection is stored into #result, in - particular, we will have: - - #result == a set of column vectors that represent the Sammon projection of - the input data vectors. - - #result.size() == data.size() - - for all valid i: - - #result[i].size() == num_dims - - #result[i] == the Sammon projection of the input vector data[i] - !*/ - - }; - -} - -#endif // DLIB_SAMMoN_ABSTRACT_Hh_ - - diff --git a/ml/dlib/dlib/statistics/statistics.h b/ml/dlib/dlib/statistics/statistics.h deleted file mode 100644 index 9dee7006b..000000000 --- a/ml/dlib/dlib/statistics/statistics.h +++ /dev/null @@ -1,1890 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net), Steve Taylor -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_STATISTICs_ -#define DLIB_STATISTICs_ - -#include "statistics_abstract.h" -#include <limits> -#include <cmath> -#include "../algs.h" -#include "../matrix.h" -#include "../sparse_vector.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - class running_stats - { - public: - - running_stats() - { - clear(); - - COMPILE_TIME_ASSERT (( - is_same_type<float,T>::value || - is_same_type<double,T>::value || - is_same_type<long double,T>::value - )); - } - - void clear() - { - sum = 0; - sum_sqr = 0; - sum_cub = 0; - sum_four = 0; - - n = 0; - min_value = std::numeric_limits<T>::infinity(); - max_value = -std::numeric_limits<T>::infinity(); - } - - void add ( - const T& val - ) - { - sum += val; - sum_sqr += val*val; - sum_cub += cubed(val); - sum_four += quaded(val); - - if (val < min_value) - min_value = val; - if (val > max_value) - max_value = val; - - ++n; - } - - T current_n ( - ) const - { - return n; - } - - T mean ( - ) const - { - if (n != 0) - return sum/n; - else - return 0; - } - - T max ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 0, - "\tT running_stats::max" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - return max_value; - } - - T min ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 0, - "\tT running_stats::min" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - return min_value; - } - - T variance ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_stats::variance" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - T temp = 1/(n-1); - temp = temp*(sum_sqr - sum*sum/n); - // make sure the variance is never negative. This might - // happen due to numerical errors. - if (temp >= 0) - return temp; - else - return 0; - } - - T stddev ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_stats::stddev" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - return std::sqrt(variance()); - } - - T skewness ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 2, - "\tT running_stats::skewness" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - T temp = 1/n; - T temp1 = std::sqrt(n*(n-1))/(n-2); - temp = temp1*temp*(sum_cub - 3*sum_sqr*sum*temp + 2*cubed(sum)*temp*temp)/ - (std::sqrt(std::pow(temp*(sum_sqr-sum*sum*temp),3))); - - return temp; - } - - T ex_kurtosis ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 3, - "\tT running_stats::kurtosis" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - T temp = 1/n; - T m4 = temp*(sum_four - 4*sum_cub*sum*temp+6*sum_sqr*sum*sum*temp*temp - -3*quaded(sum)*cubed(temp)); - T m2 = temp*(sum_sqr-sum*sum*temp); - temp = (n-1)*((n+1)*m4/(m2*m2)-3*(n-1))/((n-2)*(n-3)); - - return temp; - } - - T scale ( - const T& val - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_stats::variance" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - return (val-mean())/std::sqrt(variance()); - } - - running_stats operator+ ( - const running_stats& rhs - ) const - { - running_stats temp(*this); - - temp.sum += rhs.sum; - temp.sum_sqr += rhs.sum_sqr; - temp.sum_cub += rhs.sum_cub; - temp.sum_four += rhs.sum_four; - temp.n += rhs.n; - temp.min_value = std::min(rhs.min_value, min_value); - temp.max_value = std::max(rhs.max_value, max_value); - return temp; - } - - template <typename U> - friend void serialize ( - const running_stats<U>& item, - std::ostream& out - ); - - template <typename U> - friend void deserialize ( - running_stats<U>& item, - std::istream& in - ); - - private: - T sum; - T sum_sqr; - T sum_cub; - T sum_four; - T n; - T min_value; - T max_value; - - T cubed (const T& val) const {return val*val*val; } - T quaded (const T& val) const {return val*val*val*val; } - }; - - template <typename T> - void serialize ( - const running_stats<T>& item, - std::ostream& out - ) - { - int version = 2; - serialize(version, out); - - serialize(item.sum, out); - serialize(item.sum_sqr, out); - serialize(item.sum_cub, out); - serialize(item.sum_four, out); - serialize(item.n, out); - serialize(item.min_value, out); - serialize(item.max_value, out); - } - - template <typename T> - void deserialize ( - running_stats<T>& item, - std::istream& in - ) - { - int version = 0; - deserialize(version, in); - if (version != 2) - throw dlib::serialization_error("Unexpected version number found while deserializing dlib::running_stats object."); - - deserialize(item.sum, in); - deserialize(item.sum_sqr, in); - deserialize(item.sum_cub, in); - deserialize(item.sum_four, in); - deserialize(item.n, in); - deserialize(item.min_value, in); - deserialize(item.max_value, in); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - class running_scalar_covariance - { - public: - - running_scalar_covariance() - { - clear(); - - COMPILE_TIME_ASSERT (( - is_same_type<float,T>::value || - is_same_type<double,T>::value || - is_same_type<long double,T>::value - )); - } - - void clear() - { - sum_xy = 0; - sum_x = 0; - sum_y = 0; - sum_xx = 0; - sum_yy = 0; - n = 0; - } - - void add ( - const T& x, - const T& y - ) - { - sum_xy += x*y; - - sum_xx += x*x; - sum_yy += y*y; - - sum_x += x; - sum_y += y; - - n += 1; - } - - T current_n ( - ) const - { - return n; - } - - T mean_x ( - ) const - { - if (n != 0) - return sum_x/n; - else - return 0; - } - - T mean_y ( - ) const - { - if (n != 0) - return sum_y/n; - else - return 0; - } - - T covariance ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_scalar_covariance::covariance()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - return 1/(n-1) * (sum_xy - sum_y*sum_x/n); - } - - T correlation ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_scalar_covariance::correlation()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - return covariance() / std::sqrt(variance_x()*variance_y()); - } - - T variance_x ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_scalar_covariance::variance_x()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - T temp = 1/(n-1) * (sum_xx - sum_x*sum_x/n); - // make sure the variance is never negative. This might - // happen due to numerical errors. - if (temp >= 0) - return temp; - else - return 0; - } - - T variance_y ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_scalar_covariance::variance_y()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - T temp = 1/(n-1) * (sum_yy - sum_y*sum_y/n); - // make sure the variance is never negative. This might - // happen due to numerical errors. - if (temp >= 0) - return temp; - else - return 0; - } - - T stddev_x ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_scalar_covariance::stddev_x()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - return std::sqrt(variance_x()); - } - - T stddev_y ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_scalar_covariance::stddev_y()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - return std::sqrt(variance_y()); - } - - running_scalar_covariance operator+ ( - const running_scalar_covariance& rhs - ) const - { - running_scalar_covariance temp(rhs); - - temp.sum_xy += sum_xy; - temp.sum_x += sum_x; - temp.sum_y += sum_y; - temp.sum_xx += sum_xx; - temp.sum_yy += sum_yy; - temp.n += n; - return temp; - } - - private: - - T sum_xy; - T sum_x; - T sum_y; - T sum_xx; - T sum_yy; - T n; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - class running_scalar_covariance_decayed - { - public: - - explicit running_scalar_covariance_decayed( - T decay_halflife = 1000 - ) - { - DLIB_ASSERT(decay_halflife > 0); - - sum_xy = 0; - sum_x = 0; - sum_y = 0; - sum_xx = 0; - sum_yy = 0; - forget = std::pow(0.5, 1/decay_halflife); - n = 0; - - COMPILE_TIME_ASSERT (( - is_same_type<float,T>::value || - is_same_type<double,T>::value || - is_same_type<long double,T>::value - )); - } - - T forget_factor ( - ) const - { - return forget; - } - - void add ( - const T& x, - const T& y - ) - { - sum_xy = sum_xy*forget + x*y; - - sum_xx = sum_xx*forget + x*x; - sum_yy = sum_yy*forget + y*y; - - sum_x = sum_x*forget + x; - sum_y = sum_y*forget + y; - - n = n*forget + 1; - } - - T current_n ( - ) const - { - return n; - } - - T mean_x ( - ) const - { - if (n != 0) - return sum_x/n; - else - return 0; - } - - T mean_y ( - ) const - { - if (n != 0) - return sum_y/n; - else - return 0; - } - - T covariance ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_scalar_covariance_decayed::covariance()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - return 1/(n-1) * (sum_xy - sum_y*sum_x/n); - } - - T correlation ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_scalar_covariance_decayed::correlation()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - T temp = std::sqrt(variance_x()*variance_y()); - if (temp != 0) - return covariance() / temp; - else - return 0; // just say it's zero if there isn't any variance in x or y. - } - - T variance_x ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_scalar_covariance_decayed::variance_x()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - T temp = 1/(n-1) * (sum_xx - sum_x*sum_x/n); - // make sure the variance is never negative. This might - // happen due to numerical errors. - if (temp >= 0) - return temp; - else - return 0; - } - - T variance_y ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_scalar_covariance_decayed::variance_y()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - T temp = 1/(n-1) * (sum_yy - sum_y*sum_y/n); - // make sure the variance is never negative. This might - // happen due to numerical errors. - if (temp >= 0) - return temp; - else - return 0; - } - - T stddev_x ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_scalar_covariance_decayed::stddev_x()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - return std::sqrt(variance_x()); - } - - T stddev_y ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_scalar_covariance_decayed::stddev_y()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - return std::sqrt(variance_y()); - } - - private: - - T sum_xy; - T sum_x; - T sum_y; - T sum_xx; - T sum_yy; - T n; - T forget; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - class running_stats_decayed - { - public: - - explicit running_stats_decayed( - T decay_halflife = 1000 - ) - { - DLIB_ASSERT(decay_halflife > 0); - - sum_x = 0; - sum_xx = 0; - forget = std::pow(0.5, 1/decay_halflife); - n = 0; - - COMPILE_TIME_ASSERT (( - is_same_type<float,T>::value || - is_same_type<double,T>::value || - is_same_type<long double,T>::value - )); - } - - T forget_factor ( - ) const - { - return forget; - } - - void add ( - const T& x - ) - { - - sum_xx = sum_xx*forget + x*x; - - sum_x = sum_x*forget + x; - - n = n*forget + 1; - } - - T current_n ( - ) const - { - return n; - } - - T mean ( - ) const - { - if (n != 0) - return sum_x/n; - else - return 0; - } - - T variance ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_stats_decayed::variance()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - T temp = 1/(n-1) * (sum_xx - sum_x*sum_x/n); - // make sure the variance is never negative. This might - // happen due to numerical errors. - if (temp >= 0) - return temp; - else - return 0; - } - - T stddev ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(current_n() > 1, - "\tT running_stats_decayed::stddev()" - << "\n\tyou have to add some numbers to this object first" - << "\n\tthis: " << this - ); - - return std::sqrt(variance()); - } - - template <typename U> - friend void serialize ( - const running_stats_decayed<U>& item, - std::ostream& out - ); - - template <typename U> - friend void deserialize ( - running_stats_decayed<U>& item, - std::istream& in - ); - - private: - - T sum_x; - T sum_xx; - T n; - T forget; - }; - - template <typename T> - void serialize ( - const running_stats_decayed<T>& item, - std::ostream& out - ) - { - int version = 1; - serialize(version, out); - - serialize(item.sum_x, out); - serialize(item.sum_xx, out); - serialize(item.n, out); - serialize(item.forget, out); - } - - template <typename T> - void deserialize ( - running_stats_decayed<T>& item, - std::istream& in - ) - { - int version = 0; - deserialize(version, in); - if (version != 1) - throw dlib::serialization_error("Unexpected version number found while deserializing dlib::running_stats_decayed object."); - - deserialize(item.sum_x, in); - deserialize(item.sum_xx, in); - deserialize(item.n, in); - deserialize(item.forget, in); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - double mean_sign_agreement ( - const std::vector<T,alloc>& a, - const std::vector<T,alloc>& b - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(a.size() == b.size(), - "\t double mean_sign_agreement(a,b)" - << "\n\t a and b must be the same length." - << "\n\t a.size(): " << a.size() - << "\n\t b.size(): " << b.size() - ); - - - double temp = 0; - for (unsigned long i = 0; i < a.size(); ++i) - { - if ((a[i] >= 0 && b[i] >= 0) || - (a[i] < 0 && b[i] < 0)) - { - temp += 1; - } - } - - return temp/a.size(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - double correlation ( - const std::vector<T,alloc>& a, - const std::vector<T,alloc>& b - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(a.size() == b.size() && a.size() > 1, - "\t double correlation(a,b)" - << "\n\t a and b must be the same length and have more than one element." - << "\n\t a.size(): " << a.size() - << "\n\t b.size(): " << b.size() - ); - - running_scalar_covariance<double> rs; - for (unsigned long i = 0; i < a.size(); ++i) - { - rs.add(a[i], b[i]); - } - return rs.correlation(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - double covariance ( - const std::vector<T,alloc>& a, - const std::vector<T,alloc>& b - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(a.size() == b.size() && a.size() > 1, - "\t double covariance(a,b)" - << "\n\t a and b must be the same length and have more than one element." - << "\n\t a.size(): " << a.size() - << "\n\t b.size(): " << b.size() - ); - - running_scalar_covariance<double> rs; - for (unsigned long i = 0; i < a.size(); ++i) - { - rs.add(a[i], b[i]); - } - return rs.covariance(); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - double r_squared ( - const std::vector<T,alloc>& a, - const std::vector<T,alloc>& b - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(a.size() == b.size() && a.size() > 1, - "\t double r_squared(a,b)" - << "\n\t a and b must be the same length and have more than one element." - << "\n\t a.size(): " << a.size() - << "\n\t b.size(): " << b.size() - ); - - return std::pow(correlation(a,b),2.0); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - double mean_squared_error ( - const std::vector<T,alloc>& a, - const std::vector<T,alloc>& b - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(a.size() == b.size(), - "\t double mean_squared_error(a,b)" - << "\n\t a and b must be the same length." - << "\n\t a.size(): " << a.size() - << "\n\t b.size(): " << b.size() - ); - - return mean(squared(matrix_cast<double>(mat(a))-matrix_cast<double>(mat(b)))); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - class running_covariance - { - /*! - INITIAL VALUE - - vect_size == 0 - - total_count == 0 - - CONVENTION - - vect_size == in_vector_size() - - total_count == current_n() - - - if (total_count != 0) - - total_sum == the sum of all vectors given to add() - - the covariance of all the elements given to add() is given - by: - - let avg == total_sum/total_count - - covariance == total_cov/total_count - avg*trans(avg) - !*/ - public: - - typedef typename matrix_type::mem_manager_type mem_manager_type; - typedef typename matrix_type::type scalar_type; - typedef typename matrix_type::layout_type layout_type; - typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; - typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; - - running_covariance( - ) - { - clear(); - } - - void clear( - ) - { - total_count = 0; - - vect_size = 0; - - total_sum.set_size(0); - total_cov.set_size(0,0); - } - - long in_vector_size ( - ) const - { - return vect_size; - } - - long current_n ( - ) const - { - return static_cast<long>(total_count); - } - - void set_dimension ( - long size - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( size > 0, - "\t void running_covariance::set_dimension()" - << "\n\t Invalid inputs were given to this function" - << "\n\t size: " << size - << "\n\t this: " << this - ); - - clear(); - vect_size = size; - total_sum.set_size(size); - total_cov.set_size(size,size); - total_sum = 0; - total_cov = 0; - } - - template <typename T> - typename disable_if<is_matrix<T> >::type add ( - const T& val - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(((long)max_index_plus_one(val) <= in_vector_size() && in_vector_size() > 0), - "\t void running_covariance::add()" - << "\n\t Invalid inputs were given to this function" - << "\n\t max_index_plus_one(val): " << max_index_plus_one(val) - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t this: " << this - ); - - for (typename T::const_iterator i = val.begin(); i != val.end(); ++i) - { - total_sum(i->first) += i->second; - for (typename T::const_iterator j = val.begin(); j != val.end(); ++j) - { - total_cov(i->first, j->first) += i->second*j->second; - } - } - - ++total_count; - } - - template <typename T> - typename enable_if<is_matrix<T> >::type add ( - const T& val - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_col_vector(val) && (in_vector_size() == 0 || val.size() == in_vector_size()), - "\t void running_covariance::add()" - << "\n\t Invalid inputs were given to this function" - << "\n\t is_col_vector(val): " << is_col_vector(val) - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t val.size(): " << val.size() - << "\n\t this: " << this - ); - - vect_size = val.size(); - if (total_count == 0) - { - total_cov = val*trans(val); - total_sum = val; - } - else - { - total_cov += val*trans(val); - total_sum += val; - } - ++total_count; - } - - const column_matrix mean ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT( in_vector_size() != 0, - "\t running_covariance::mean()" - << "\n\t This object can not execute this function in its current state." - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t current_n(): " << current_n() - << "\n\t this: " << this - ); - - return total_sum/total_count; - } - - const general_matrix covariance ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT( in_vector_size() != 0 && current_n() > 1, - "\t running_covariance::covariance()" - << "\n\t This object can not execute this function in its current state." - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t current_n(): " << current_n() - << "\n\t this: " << this - ); - - return (total_cov - total_sum*trans(total_sum)/total_count)/(total_count-1); - } - - const running_covariance operator+ ( - const running_covariance& item - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT((in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()), - "\t running_covariance running_covariance::operator+()" - << "\n\t The two running_covariance objects being added must have compatible parameters" - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t item.in_vector_size(): " << item.in_vector_size() - << "\n\t this: " << this - ); - - running_covariance temp(item); - - // make sure we ignore empty matrices - if (total_count != 0 && temp.total_count != 0) - { - temp.total_cov += total_cov; - temp.total_sum += total_sum; - temp.total_count += total_count; - } - else if (total_count != 0) - { - temp.total_cov = total_cov; - temp.total_sum = total_sum; - temp.total_count = total_count; - } - - return temp; - } - - - private: - - general_matrix total_cov; - column_matrix total_sum; - scalar_type total_count; - - long vect_size; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - class running_cross_covariance - { - /*! - INITIAL VALUE - - x_vect_size == 0 - - y_vect_size == 0 - - total_count == 0 - - CONVENTION - - x_vect_size == x_vector_size() - - y_vect_size == y_vector_size() - - total_count == current_n() - - - if (total_count != 0) - - sum_x == the sum of all x vectors given to add() - - sum_y == the sum of all y vectors given to add() - - total_cov == sum of all x*trans(y) given to add() - !*/ - - public: - - typedef typename matrix_type::mem_manager_type mem_manager_type; - typedef typename matrix_type::type scalar_type; - typedef typename matrix_type::layout_type layout_type; - typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; - typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; - - running_cross_covariance( - ) - { - clear(); - } - - void clear( - ) - { - total_count = 0; - - x_vect_size = 0; - y_vect_size = 0; - - sum_x.set_size(0); - sum_y.set_size(0); - total_cov.set_size(0,0); - } - - long x_vector_size ( - ) const - { - return x_vect_size; - } - - long y_vector_size ( - ) const - { - return y_vect_size; - } - - void set_dimensions ( - long x_size, - long y_size - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( x_size > 0 && y_size > 0, - "\t void running_cross_covariance::set_dimensions()" - << "\n\t Invalid inputs were given to this function" - << "\n\t x_size: " << x_size - << "\n\t y_size: " << y_size - << "\n\t this: " << this - ); - - clear(); - x_vect_size = x_size; - y_vect_size = y_size; - sum_x.set_size(x_size); - sum_y.set_size(y_size); - total_cov.set_size(x_size,y_size); - - sum_x = 0; - sum_y = 0; - total_cov = 0; - } - - long current_n ( - ) const - { - return static_cast<long>(total_count); - } - - template <typename T, typename U> - typename enable_if_c<!is_matrix<T>::value && !is_matrix<U>::value>::type add ( - const T& x, - const U& y - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( ((long)max_index_plus_one(x) <= x_vector_size() && x_vector_size() > 0) && - ((long)max_index_plus_one(y) <= y_vector_size() && y_vector_size() > 0) , - "\t void running_cross_covariance::add()" - << "\n\t Invalid inputs were given to this function" - << "\n\t max_index_plus_one(x): " << max_index_plus_one(x) - << "\n\t max_index_plus_one(y): " << max_index_plus_one(y) - << "\n\t x_vector_size(): " << x_vector_size() - << "\n\t y_vector_size(): " << y_vector_size() - << "\n\t this: " << this - ); - - for (typename T::const_iterator i = x.begin(); i != x.end(); ++i) - { - sum_x(i->first) += i->second; - for (typename U::const_iterator j = y.begin(); j != y.end(); ++j) - { - total_cov(i->first, j->first) += i->second*j->second; - } - } - - // do sum_y += y - for (typename U::const_iterator j = y.begin(); j != y.end(); ++j) - { - sum_y(j->first) += j->second; - } - - ++total_count; - } - - template <typename T, typename U> - typename enable_if_c<is_matrix<T>::value && !is_matrix<U>::value>::type add ( - const T& x, - const U& y - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( (is_col_vector(x) && x.size() == x_vector_size() && x_vector_size() > 0) && - ((long)max_index_plus_one(y) <= y_vector_size() && y_vector_size() > 0) , - "\t void running_cross_covariance::add()" - << "\n\t Invalid inputs were given to this function" - << "\n\t is_col_vector(x): " << is_col_vector(x) - << "\n\t x.size(): " << x.size() - << "\n\t max_index_plus_one(y): " << max_index_plus_one(y) - << "\n\t x_vector_size(): " << x_vector_size() - << "\n\t y_vector_size(): " << y_vector_size() - << "\n\t this: " << this - ); - - sum_x += x; - - for (long i = 0; i < x.size(); ++i) - { - for (typename U::const_iterator j = y.begin(); j != y.end(); ++j) - { - total_cov(i, j->first) += x(i)*j->second; - } - } - - // do sum_y += y - for (typename U::const_iterator j = y.begin(); j != y.end(); ++j) - { - sum_y(j->first) += j->second; - } - - ++total_count; - } - - template <typename T, typename U> - typename enable_if_c<!is_matrix<T>::value && is_matrix<U>::value>::type add ( - const T& x, - const U& y - ) - { - // make sure requires clause is not broken - DLIB_ASSERT( ((long)max_index_plus_one(x) <= x_vector_size() && x_vector_size() > 0) && - (is_col_vector(y) && y.size() == (long)y_vector_size() && y_vector_size() > 0) , - "\t void running_cross_covariance::add()" - << "\n\t Invalid inputs were given to this function" - << "\n\t max_index_plus_one(x): " << max_index_plus_one(x) - << "\n\t is_col_vector(y): " << is_col_vector(y) - << "\n\t y.size(): " << y.size() - << "\n\t x_vector_size(): " << x_vector_size() - << "\n\t y_vector_size(): " << y_vector_size() - << "\n\t this: " << this - ); - - for (typename T::const_iterator i = x.begin(); i != x.end(); ++i) - { - sum_x(i->first) += i->second; - for (long j = 0; j < y.size(); ++j) - { - total_cov(i->first, j) += i->second*y(j); - } - } - - sum_y += y; - - ++total_count; - } - - template <typename T, typename U> - typename enable_if_c<is_matrix<T>::value && is_matrix<U>::value>::type add ( - const T& x, - const U& y - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(is_col_vector(x) && (x_vector_size() == 0 || x.size() == x_vector_size()) && - is_col_vector(y) && (y_vector_size() == 0 || y.size() == y_vector_size()) && - x.size() != 0 && - y.size() != 0, - "\t void running_cross_covariance::add()" - << "\n\t Invalid inputs were given to this function" - << "\n\t is_col_vector(x): " << is_col_vector(x) - << "\n\t x_vector_size(): " << x_vector_size() - << "\n\t x.size(): " << x.size() - << "\n\t is_col_vector(y): " << is_col_vector(y) - << "\n\t y_vector_size(): " << y_vector_size() - << "\n\t y.size(): " << y.size() - << "\n\t this: " << this - ); - - x_vect_size = x.size(); - y_vect_size = y.size(); - if (total_count == 0) - { - total_cov = x*trans(y); - sum_x = x; - sum_y = y; - } - else - { - total_cov += x*trans(y); - sum_x += x; - sum_y += y; - } - ++total_count; - } - - const column_matrix mean_x ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT( current_n() != 0, - "\t running_cross_covariance::mean()" - << "\n\t This object can not execute this function in its current state." - << "\n\t current_n(): " << current_n() - << "\n\t this: " << this - ); - - return sum_x/total_count; - } - - const column_matrix mean_y ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT( current_n() != 0, - "\t running_cross_covariance::mean()" - << "\n\t This object can not execute this function in its current state." - << "\n\t current_n(): " << current_n() - << "\n\t this: " << this - ); - - return sum_y/total_count; - } - - const general_matrix covariance_xy ( - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT( current_n() > 1, - "\t running_cross_covariance::covariance()" - << "\n\t This object can not execute this function in its current state." - << "\n\t x_vector_size(): " << x_vector_size() - << "\n\t y_vector_size(): " << y_vector_size() - << "\n\t current_n(): " << current_n() - << "\n\t this: " << this - ); - - return (total_cov - sum_x*trans(sum_y)/total_count)/(total_count-1); - } - - const running_cross_covariance operator+ ( - const running_cross_covariance& item - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT((x_vector_size() == 0 || item.x_vector_size() == 0 || x_vector_size() == item.x_vector_size()) && - (y_vector_size() == 0 || item.y_vector_size() == 0 || y_vector_size() == item.y_vector_size()), - "\t running_cross_covariance running_cross_covariance::operator+()" - << "\n\t The two running_cross_covariance objects being added must have compatible parameters" - << "\n\t x_vector_size(): " << x_vector_size() - << "\n\t item.x_vector_size(): " << item.x_vector_size() - << "\n\t y_vector_size(): " << y_vector_size() - << "\n\t item.y_vector_size(): " << item.y_vector_size() - << "\n\t this: " << this - ); - - running_cross_covariance temp(item); - - // make sure we ignore empty matrices - if (total_count != 0 && temp.total_count != 0) - { - temp.total_cov += total_cov; - temp.sum_x += sum_x; - temp.sum_y += sum_y; - temp.total_count += total_count; - } - else if (total_count != 0) - { - temp.total_cov = total_cov; - temp.sum_x = sum_x; - temp.sum_y = sum_y; - temp.total_count = total_count; - } - - return temp; - } - - - private: - - general_matrix total_cov; - column_matrix sum_x; - column_matrix sum_y; - scalar_type total_count; - - long x_vect_size; - long y_vect_size; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - class vector_normalizer - { - public: - typedef typename matrix_type::mem_manager_type mem_manager_type; - typedef typename matrix_type::type scalar_type; - typedef matrix_type result_type; - - template <typename vector_type> - void train ( - const vector_type& samples - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(samples.size() > 0, - "\tvoid vector_normalizer::train()" - << "\n\tyou have to give a nonempty set of samples to this function" - << "\n\tthis: " << this - ); - - m = mean(mat(samples)); - sd = reciprocal(sqrt(variance(mat(samples)))); - - DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer::train() have infinite or NaN values"); - } - - long in_vector_size ( - ) const - { - return m.nr(); - } - - long out_vector_size ( - ) const - { - return m.nr(); - } - - const matrix_type& means ( - ) const - { - return m; - } - - const matrix_type& std_devs ( - ) const - { - return sd; - } - - const result_type& operator() ( - const matrix_type& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(x.nr() == in_vector_size() && x.nc() == 1, - "\tmatrix vector_normalizer::operator()" - << "\n\t you have given invalid arguments to this function" - << "\n\t x.nr(): " << x.nr() - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t x.nc(): " << x.nc() - << "\n\t this: " << this - ); - - temp_out = pointwise_multiply(x-m, sd); - return temp_out; - } - - void swap ( - vector_normalizer& item - ) - { - m.swap(item.m); - sd.swap(item.sd); - temp_out.swap(item.temp_out); - } - - template <typename mt> - friend void deserialize ( - vector_normalizer<mt>& item, - std::istream& in - ); - - template <typename mt> - friend void serialize ( - const vector_normalizer<mt>& item, - std::ostream& out - ); - - private: - - // ------------------- private data members ------------------- - - matrix_type m, sd; - - // This is just a temporary variable that doesn't contribute to the - // state of this object. - mutable matrix_type temp_out; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - inline void swap ( - vector_normalizer<matrix_type>& a, - vector_normalizer<matrix_type>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - void deserialize ( - vector_normalizer<matrix_type>& item, - std::istream& in - ) - { - deserialize(item.m, in); - deserialize(item.sd, in); - // Keep deserializing the pca matrix for backwards compatibility. - matrix<double> pca; - deserialize(pca, in); - - if (pca.size() != 0) - throw serialization_error("Error deserializing object of type vector_normalizer\n" - "It looks like a serialized vector_normalizer_pca was accidentally deserialized into \n" - "a vector_normalizer object."); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - void serialize ( - const vector_normalizer<matrix_type>& item, - std::ostream& out - ) - { - serialize(item.m, out); - serialize(item.sd, out); - // Keep serializing the pca matrix for backwards compatibility. - matrix<double> pca; - serialize(pca, out); - } - -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - class vector_normalizer_pca - { - public: - typedef typename matrix_type::mem_manager_type mem_manager_type; - typedef typename matrix_type::type scalar_type; - typedef matrix<scalar_type,0,1,mem_manager_type> result_type; - - template <typename vector_type> - void train ( - const vector_type& samples, - const double eps = 0.99 - ) - { - // You are getting an error here because you are trying to apply PCA - // to a vector of fixed length. But PCA is going to try and do - // dimensionality reduction so you can't use a vector with a fixed dimension. - COMPILE_TIME_ASSERT(matrix_type::NR == 0); - - // make sure requires clause is not broken - DLIB_ASSERT(samples.size() > 0, - "\tvoid vector_normalizer_pca::train_pca()" - << "\n\tyou have to give a nonempty set of samples to this function" - << "\n\tthis: " << this - ); - DLIB_ASSERT(0 < eps && eps <= 1, - "\tvoid vector_normalizer_pca::train_pca()" - << "\n\tyou have to give a nonempty set of samples to this function" - << "\n\tthis: " << this - ); - train_pca_impl(mat(samples),eps); - - DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer_pca::train() have infinite or NaN values"); - } - - long in_vector_size ( - ) const - { - return m.nr(); - } - - long out_vector_size ( - ) const - { - return pca.nr(); - } - - const matrix<scalar_type,0,1,mem_manager_type>& means ( - ) const - { - return m; - } - - const matrix<scalar_type,0,1,mem_manager_type>& std_devs ( - ) const - { - return sd; - } - - const matrix<scalar_type,0,0,mem_manager_type>& pca_matrix ( - ) const - { - return pca; - } - - const result_type& operator() ( - const matrix_type& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(x.nr() == in_vector_size() && x.nc() == 1, - "\tmatrix vector_normalizer_pca::operator()" - << "\n\t you have given invalid arguments to this function" - << "\n\t x.nr(): " << x.nr() - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t x.nc(): " << x.nc() - << "\n\t this: " << this - ); - - // If we have a pca transform matrix on hand then - // also apply that. - temp_out = pca*pointwise_multiply(x-m, sd); - - return temp_out; - } - - void swap ( - vector_normalizer_pca& item - ) - { - m.swap(item.m); - sd.swap(item.sd); - pca.swap(item.pca); - temp_out.swap(item.temp_out); - } - - template <typename T> - friend void deserialize ( - vector_normalizer_pca<T>& item, - std::istream& in - ); - - template <typename T> - friend void serialize ( - const vector_normalizer_pca<T>& item, - std::ostream& out - ); - - private: - - template <typename mat_type> - void train_pca_impl ( - const mat_type& samples, - const double eps - ) - { - m = mean(samples); - sd = reciprocal(sqrt(variance(samples))); - - // fill x with the normalized version of the input samples - matrix<typename mat_type::type,0,1,mem_manager_type> x(samples); - for (long r = 0; r < x.size(); ++r) - x(r) = pointwise_multiply(x(r)-m, sd); - - matrix<scalar_type,0,0,mem_manager_type> temp, eigen; - matrix<scalar_type,0,1,mem_manager_type> eigenvalues; - - // Compute the svd of the covariance matrix of the normalized inputs - svd(covariance(x), temp, eigen, pca); - eigenvalues = diag(eigen); - - rsort_columns(pca, eigenvalues); - - // figure out how many eigenvectors we want in our pca matrix - const double thresh = sum(eigenvalues)*eps; - long num_vectors = 0; - double total = 0; - for (long r = 0; r < eigenvalues.size() && total < thresh; ++r) - { - ++num_vectors; - total += eigenvalues(r); - } - - // So now we know we want to use num_vectors of the first eigenvectors. So - // pull those out and discard the rest. - pca = trans(colm(pca,range(0,num_vectors-1))); - - // Apply the pca transform to the data in x. Then we will normalize the - // pca matrix below. - for (long r = 0; r < x.nr(); ++r) - { - x(r) = pca*x(r); - } - - // Here we just scale the output features from the pca transform so - // that the variance of each feature is 1. So this doesn't really change - // what the pca is doing, it just makes sure the output features are - // normalized. - pca = trans(scale_columns(trans(pca), reciprocal(sqrt(variance(x))))); - } - - - // ------------------- private data members ------------------- - - matrix<scalar_type,0,1,mem_manager_type> m, sd; - matrix<scalar_type,0,0,mem_manager_type> pca; - - // This is just a temporary variable that doesn't contribute to the - // state of this object. - mutable result_type temp_out; - }; - - template < - typename matrix_type - > - inline void swap ( - vector_normalizer_pca<matrix_type>& a, - vector_normalizer_pca<matrix_type>& b - ) { a.swap(b); } - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - void deserialize ( - vector_normalizer_pca<matrix_type>& item, - std::istream& in - ) - { - deserialize(item.m, in); - deserialize(item.sd, in); - deserialize(item.pca, in); - if (item.pca.nc() != item.m.nr()) - throw serialization_error("Error deserializing object of type vector_normalizer_pca\n" - "It looks like a serialized vector_normalizer was accidentally deserialized into \n" - "a vector_normalizer_pca object."); - } - - template < - typename matrix_type - > - void serialize ( - const vector_normalizer_pca<matrix_type>& item, - std::ostream& out - ) - { - serialize(item.m, out); - serialize(item.sd, out); - serialize(item.pca, out); - } - -// ---------------------------------------------------------------------------------------- - - inline double binomial_random_vars_are_different ( - uint64_t k1, - uint64_t n1, - uint64_t k2, - uint64_t n2 - ) - { - DLIB_ASSERT(k1 <= n1, "k1: "<< k1 << " n1: "<< n1); - DLIB_ASSERT(k2 <= n2, "k2: "<< k2 << " n2: "<< n2); - - const double p1 = k1/(double)n1; - const double p2 = k2/(double)n2; - const double p = (k1+k2)/(double)(n1+n2); - - auto ll = [](double p, uint64_t k, uint64_t n) { - if (p == 0 || p == 1) - return 0.0; - return k*std::log(p) + (n-k)*std::log(1-p); - }; - - auto logll = ll(p1,k1,n1) + ll(p2,k2,n2) - ll(p,k1,n1) - ll(p,k2,n2); - - // The basic statistic only tells you if the random variables are different. But - // it's nice to know which way they are different, i.e., which one is bigger. So - // stuff that information into the sign bit of the return value. - if (p1>=p2) - return logll; - else - return -logll; - } - -// ---------------------------------------------------------------------------------------- - - inline double event_correlation ( - uint64_t A_count, - uint64_t B_count, - uint64_t AB_count, - uint64_t total_num_observations - ) - { - DLIB_ASSERT(AB_count <= A_count && A_count <= total_num_observations, - "AB_count: " << AB_count << ", A_count: "<< A_count << ", total_num_observations: " << total_num_observations); - DLIB_ASSERT(AB_count <= B_count && B_count <= total_num_observations, - "AB_count: " << AB_count << ", B_count: "<< B_count << ", total_num_observations: " << total_num_observations); - - if (total_num_observations == 0) - return 0; - - DLIB_ASSERT(A_count + B_count - AB_count <= total_num_observations, - "AB_count: " << AB_count << " A_count: " << A_count << ", B_count: "<< B_count << ", total_num_observations: " << total_num_observations); - - - const auto AnotB_count = A_count - AB_count; - const auto notB_count = total_num_observations - B_count; - // How likely is it that the odds of A happening is different when conditioned on - // whether or not B happened? - return binomial_random_vars_are_different( - AB_count, B_count, // A conditional on the presence of B - AnotB_count, notB_count // A conditional on the absence of B - ); - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STATISTICs_ - diff --git a/ml/dlib/dlib/statistics/statistics_abstract.h b/ml/dlib/dlib/statistics/statistics_abstract.h deleted file mode 100644 index ef8f13802..000000000 --- a/ml/dlib/dlib/statistics/statistics_abstract.h +++ /dev/null @@ -1,1387 +0,0 @@ -// Copyright (C) 2008 Davis E. King (davis@dlib.net), Steve Taylor -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_STATISTICs_ABSTRACT_ -#ifdef DLIB_STATISTICs_ABSTRACT_ - -#include <limits> -#include <cmath> -#include "../matrix/matrix_abstract.h" -#include "../svm/sparse_vector_abstract.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - double mean_sign_agreement ( - const std::vector<T,alloc>& a, - const std::vector<T,alloc>& b - ); - /*! - requires - - a.size() == b.size() - ensures - - returns the number of times a[i] has the same sign as b[i] divided by - a.size(). So we return the probability that elements of a and b have - the same sign. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - double correlation ( - const std::vector<T,alloc>& a, - const std::vector<T,alloc>& b - ); - /*! - requires - - a.size() == b.size() - - a.size() > 1 - ensures - - returns the correlation coefficient between all the elements of a and b. - (i.e. how correlated is a(i) with b(i)) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - double covariance ( - const std::vector<T,alloc>& a, - const std::vector<T,alloc>& b - ); - /*! - requires - - a.size() == b.size() - - a.size() > 1 - ensures - - returns the covariance between all the elements of a and b. - (i.e. how does a(i) vary with b(i)) - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - double r_squared ( - const std::vector<T,alloc>& a, - const std::vector<T,alloc>& b - ); - /*! - requires - - a.size() == b.size() - - a.size() > 1 - ensures - - returns the R^2 coefficient of determination between all the elements of a and b. - This value is just the square of correlation(a,b). - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T, - typename alloc - > - double mean_squared_error ( - const std::vector<T,alloc>& a, - const std::vector<T,alloc>& b - ); - /*! - requires - - a.size() == b.size() - ensures - - returns the mean squared error between all the elements of a and b. - (i.e. mean(squared(mat(a)-mat(b)))) - !*/ - -// ---------------------------------------------------------------------------------------- - - double binomial_random_vars_are_different ( - uint64_t k1, - uint64_t n1, - uint64_t k2, - uint64_t n2 - ); - /*! - requires - - k1 <= n1 - - k2 <= n2 - ensures - - Given two binomially distributed random variables, X1 and X2, we want to know - if these variables have the same parameter (i.e. the chance of "success"). - So assume that: - - You observed X1 to give k1 successes out of n1 trials. - - You observed X2 to give k2 successes out of n2 trials. - - This function performs a simple likelihood ratio test to determine if X1 and - X2 have the same parameter. The return value of this function will be: - - Close to 0 if they are probably the same. - - Larger than 0 if X1 probably has a higher "success" rate than X2. - - Smaller than 0 if X2 probably has a higher "success" rate than X1. - Moreover, the larger the absolute magnitude of the return value the more - likely it is that X1 and X2 have different distributions. - - For a discussion of the technique and applications see: - Dunning, Ted. "Accurate methods for the statistics of surprise and - coincidence." Computational linguistics 19.1 (1993): 61-74. - !*/ - -// ---------------------------------------------------------------------------------------- - - double event_correlation ( - uint64_t A_count, - uint64_t B_count, - uint64_t AB_count, - uint64_t total_num_observations - ); - /*! - requires - - AB_count <= A_count <= total_num_observations - - AB_count <= B_count <= total_num_observations - - A_count + B_count - AB_count <= total_num_observations - ensures - - This function does a statistical test to determine if two events co-occur in - a statistically significant way. In particular, we assume you performed - total_num_observations measurements and during those measurements you: - - Observed event A to happen A_count times. - - Observed event B to happen B_count times. - - Observed AB_count co-occurrences of the events. That is, AB_count is the - number of times the events happened together during the same measurement. - - This function returns a number, COR, which can take any real value. It has - the following interpretations: - - COR == 0: there is no evidence of correlation between the two events. - They appear to be unrelated. - - COR > 0: There is evidence that A and B co-occur together. That is, - they happen at the same times more often than you would expect if they - were independent events. The larger the magnitude of COR the more - evidence we have for the correlation. - - COR < 0: There is evidence that A and B are anti-correlated. That is, - when A happens B is unlikely to happen and vise versa. The larger the - magnitude of COR the more evidence we have for the anti-correlation. - - This function implements the simple likelihood ratio test discussed in the - following paper: - Dunning, Ted. "Accurate methods for the statistics of surprise and - coincidence." Computational linguistics 19.1 (1993): 61-74. - So for an extended discussion of the method see the above paper. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - class running_stats - { - /*! - REQUIREMENTS ON T - - T must be a float, double, or long double type - - INITIAL VALUE - - mean() == 0 - - current_n() == 0 - - WHAT THIS OBJECT REPRESENTS - This object represents something that can compute the running mean, - variance, skewness, and excess kurtosis of a stream of real numbers. - !*/ - public: - - running_stats( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void clear( - ); - /*! - ensures - - this object has its initial value - - clears all memory of any previous data points - !*/ - - T current_n ( - ) const; - /*! - ensures - - returns the number of points given to this object so far. - !*/ - - void add ( - const T& val - ); - /*! - ensures - - updates the mean, variance, skewness, and kurtosis stored in this object - so that the new value is factored into them. - - #mean() == mean()*current_n()/(current_n()+1) + val/(current_n()+1). - (i.e. the updated mean value that takes the new value into account) - - #variance() == the updated variance that takes this new value into account. - - #skewness() == the updated skewness that takes this new value into account. - - #ex_kurtosis() == the updated kurtosis that takes this new value into account. - - #current_n() == current_n() + 1 - !*/ - - T mean ( - ) const; - /*! - ensures - - returns the mean of all the values presented to this object - so far. - !*/ - - T variance ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the unbiased sample variance of all the values presented to this - object so far. - !*/ - - T stddev ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the unbiased sampled standard deviation of all the values - presented to this object so far. - !*/ - - T skewness ( - ) const; - /*! - requires - - current_n() > 2 - ensures - - returns the unbiased sample skewness of all the values presented - to this object so far. - !*/ - - T ex_kurtosis( - ) const; - /*! - requires - - current_n() > 3 - ensures - - returns the unbiased sample kurtosis of all the values presented - to this object so far. - !*/ - - T max ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the largest value presented to this object so far. - !*/ - - T min ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the smallest value presented to this object so far. - !*/ - - T scale ( - const T& val - ) const; - /*! - requires - - current_n() > 1 - ensures - - return (val-mean())/stddev(); - !*/ - - running_stats operator+ ( - const running_stats& rhs - ) const; - /*! - ensures - - returns a new running_stats object that represents the combination of all - the values given to *this and rhs. That is, this function returns a - running_stats object, R, that is equivalent to what you would obtain if - all calls to this->add() and rhs.add() had instead been done to R. - !*/ - }; - - template <typename T> - void serialize ( - const running_stats<T>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - template <typename T> - void deserialize ( - running_stats<T>& item, - std::istream& in - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - class running_scalar_covariance - { - /*! - REQUIREMENTS ON T - - T must be a float, double, or long double type - - INITIAL VALUE - - mean_x() == 0 - - mean_y() == 0 - - current_n() == 0 - - WHAT THIS OBJECT REPRESENTS - This object represents something that can compute the running covariance - of a stream of real number pairs. - !*/ - - public: - - running_scalar_covariance( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void clear( - ); - /*! - ensures - - this object has its initial value - - clears all memory of any previous data points - !*/ - - void add ( - const T& x, - const T& y - ); - /*! - ensures - - updates the statistics stored in this object so that - the new pair (x,y) is factored into them. - - #current_n() == current_n() + 1 - !*/ - - T current_n ( - ) const; - /*! - ensures - - returns the number of points given to this object so far. - !*/ - - T mean_x ( - ) const; - /*! - ensures - - returns the mean value of all x samples presented to this object - via add(). - !*/ - - T mean_y ( - ) const; - /*! - ensures - - returns the mean value of all y samples presented to this object - via add(). - !*/ - - T covariance ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the covariance between all the x and y samples presented - to this object via add() - !*/ - - T correlation ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the correlation coefficient between all the x and y samples - presented to this object via add() - !*/ - - T variance_x ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the unbiased sample variance value of all x samples presented - to this object via add(). - !*/ - - T variance_y ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the unbiased sample variance value of all y samples presented - to this object via add(). - !*/ - - T stddev_x ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the unbiased sample standard deviation of all x samples - presented to this object via add(). - !*/ - - T stddev_y ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the unbiased sample standard deviation of all y samples - presented to this object via add(). - !*/ - - running_scalar_covariance operator+ ( - const running_covariance& rhs - ) const; - /*! - ensures - - returns a new running_scalar_covariance object that represents the - combination of all the values given to *this and rhs. That is, this - function returns a running_scalar_covariance object, R, that is - equivalent to what you would obtain if all calls to this->add() and - rhs.add() had instead been done to R. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - class running_scalar_covariance_decayed - { - /*! - REQUIREMENTS ON T - - T must be a float, double, or long double type - - INITIAL VALUE - - mean_x() == 0 - - mean_y() == 0 - - current_n() == 0 - - WHAT THIS OBJECT REPRESENTS - This object represents something that can compute the running covariance of - a stream of real number pairs. It is essentially the same as - running_scalar_covariance except that it forgets about data it has seen - after a certain period of time. It does this by exponentially decaying old - statistics. - !*/ - - public: - - running_scalar_covariance_decayed( - T decay_halflife = 1000 - ); - /*! - requires - - decay_halflife > 0 - ensures - - #forget_factor() == std::pow(0.5, 1/decay_halflife); - (i.e. after decay_halflife calls to add() the data given to the first add - will be down weighted by 0.5 in the statistics stored in this object). - !*/ - - T forget_factor ( - ) const; - /*! - ensures - - returns the exponential forget factor used to forget old statistics when - add() is called. - !*/ - - void add ( - const T& x, - const T& y - ); - /*! - ensures - - updates the statistics stored in this object so that - the new pair (x,y) is factored into them. - - #current_n() == current_n()*forget_factor() + forget_factor() - - Down weights old statistics by a factor of forget_factor(). - !*/ - - T current_n ( - ) const; - /*! - ensures - - returns the effective number of points given to this object. As add() - is called this value will converge to a constant, the value of which is - based on the decay_halflife supplied to the constructor. - !*/ - - T mean_x ( - ) const; - /*! - ensures - - returns the mean value of all x samples presented to this object - via add(). - !*/ - - T mean_y ( - ) const; - /*! - ensures - - returns the mean value of all y samples presented to this object - via add(). - !*/ - - T covariance ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the covariance between all the x and y samples presented - to this object via add() - !*/ - - T correlation ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the correlation coefficient between all the x and y samples - presented to this object via add() - !*/ - - T variance_x ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the sample variance value of all x samples presented - to this object via add(). - !*/ - - T variance_y ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the sample variance value of all y samples presented - to this object via add(). - !*/ - - T stddev_x ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the sample standard deviation of all x samples - presented to this object via add(). - !*/ - - T stddev_y ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the sample standard deviation of all y samples - presented to this object via add(). - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename T - > - class running_stats_decayed - { - /*! - REQUIREMENTS ON T - - T must be a float, double, or long double type - - INITIAL VALUE - - mean() == 0 - - current_n() == 0 - - WHAT THIS OBJECT REPRESENTS - This object represents something that can compute the running mean and - variance of a stream of real numbers. It is similar to running_stats - except that it forgets about data it has seen after a certain period of - time. It does this by exponentially decaying old statistics. - !*/ - - public: - - running_stats_decayed( - T decay_halflife = 1000 - ); - /*! - requires - - decay_halflife > 0 - ensures - - #forget_factor() == std::pow(0.5, 1/decay_halflife); - (i.e. after decay_halflife calls to add() the data given to the first add - will be down weighted by 0.5 in the statistics stored in this object). - !*/ - - T forget_factor ( - ) const; - /*! - ensures - - returns the exponential forget factor used to forget old statistics when - add() is called. - !*/ - - void add ( - const T& x - ); - /*! - ensures - - updates the statistics stored in this object so that x is factored into - them. - - #current_n() == current_n()*forget_factor() + forget_factor() - - Down weights old statistics by a factor of forget_factor(). - !*/ - - T current_n ( - ) const; - /*! - ensures - - returns the effective number of points given to this object. As add() - is called this value will converge to a constant, the value of which is - based on the decay_halflife supplied to the constructor. - !*/ - - T mean ( - ) const; - /*! - ensures - - returns the mean value of all x samples presented to this object - via add(). - !*/ - - T variance ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the sample variance value of all x samples presented to this - object via add(). - !*/ - - T stddev ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the sample standard deviation of all x samples presented to this - object via add(). - !*/ - - }; - - template <typename T> - void serialize ( - const running_stats_decayed<T>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - - template <typename T> - void deserialize ( - running_stats_decayed<T>& item, - std::istream& in - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - class running_covariance - { - /*! - REQUIREMENTS ON matrix_type - Must be some type of dlib::matrix. - - INITIAL VALUE - - in_vector_size() == 0 - - current_n() == 0 - - WHAT THIS OBJECT REPRESENTS - This object is a simple tool for computing the mean and - covariance of a sequence of vectors. - !*/ - public: - - typedef typename matrix_type::mem_manager_type mem_manager_type; - typedef typename matrix_type::type scalar_type; - typedef typename matrix_type::layout_type layout_type; - typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; - typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; - - running_covariance( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void clear( - ); - /*! - ensures - - this object has its initial value - - clears all memory of any previous data points - !*/ - - long current_n ( - ) const; - /*! - ensures - - returns the number of samples that have been presented to this object - !*/ - - long in_vector_size ( - ) const; - /*! - ensures - - if (this object has been presented with any input vectors or - set_dimension() has been called) then - - returns the dimension of the column vectors used with this object - - else - - returns 0 - !*/ - - void set_dimension ( - long size - ); - /*! - requires - - size > 0 - ensures - - #in_vector_size() == size - - #current_n() == 0 - !*/ - - template <typename T> - void add ( - const T& val - ); - /*! - requires - - val must represent a column vector. It can either be a dlib::matrix - object or some kind of unsorted sparse vector type. See the top of - dlib/svm/sparse_vector_abstract.h for a definition of unsorted sparse vector. - - val must have a number of dimensions which is compatible with the current - setting of in_vector_size(). In particular, this means that the - following must hold: - - if (val is a dlib::matrix) then - - in_vector_size() == 0 || val.size() == val_vector_size() - - else - - max_index_plus_one(val) <= in_vector_size() - - in_vector_size() > 0 - (i.e. you must call set_dimension() prior to calling add() if - you want to use sparse vectors.) - ensures - - updates the mean and covariance stored in this object so that - the new value is factored into them. - - if (val is a dlib::matrix) then - - #in_vector_size() == val.size() - !*/ - - const column_matrix mean ( - ) const; - /*! - requires - - in_vector_size() != 0 - ensures - - returns the mean of all the vectors presented to this object - so far. - !*/ - - const general_matrix covariance ( - ) const; - /*! - requires - - in_vector_size() != 0 - - current_n() > 1 - ensures - - returns the unbiased sample covariance matrix for all the vectors - presented to this object so far. - !*/ - - const running_covariance operator+ ( - const running_covariance& item - ) const; - /*! - requires - - in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size() - (i.e. the in_vector_size() of *this and item must match or one must be zero) - ensures - - returns a new running_covariance object that represents the combination of all - the vectors given to *this and item. That is, this function returns a - running_covariance object, R, that is equivalent to what you would obtain if all - calls to this->add() and item.add() had instead been done to R. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - class running_cross_covariance - { - /*! - REQUIREMENTS ON matrix_type - Must be some type of dlib::matrix. - - INITIAL VALUE - - x_vector_size() == 0 - - y_vector_size() == 0 - - current_n() == 0 - - WHAT THIS OBJECT REPRESENTS - This object is a simple tool for computing the mean and cross-covariance - matrices of a sequence of pairs of vectors. - !*/ - - public: - - typedef typename matrix_type::mem_manager_type mem_manager_type; - typedef typename matrix_type::type scalar_type; - typedef typename matrix_type::layout_type layout_type; - typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix; - typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix; - - running_cross_covariance( - ); - /*! - ensures - - this object is properly initialized - !*/ - - void clear( - ); - /*! - ensures - - This object has its initial value. - - Clears all memory of any previous data points. - !*/ - - long x_vector_size ( - ) const; - /*! - ensures - - if (this object has been presented with any input vectors or - set_dimensions() has been called) then - - returns the dimension of the x vectors given to this object via add(). - - else - - returns 0 - !*/ - - long y_vector_size ( - ) const; - /*! - ensures - - if (this object has been presented with any input vectors or - set_dimensions() has been called) then - - returns the dimension of the y vectors given to this object via add(). - - else - - returns 0 - !*/ - - void set_dimensions ( - long x_size, - long y_size - ); - /*! - requires - - x_size > 0 - - y_size > 0 - ensures - - #x_vector_size() == x_size - - #y_vector_size() == y_size - - #current_n() == 0 - !*/ - - long current_n ( - ) const; - /*! - ensures - - returns the number of samples that have been presented to this object. - !*/ - - template <typename T, typename U> - void add ( - const T& x, - const U& y - ); - /*! - requires - - x and y must represent column vectors. They can either be dlib::matrix - objects or some kind of unsorted sparse vector type. See the top of - dlib/svm/sparse_vector_abstract.h for a definition of unsorted sparse vector. - - x and y must have a number of dimensions which is compatible with the - current setting of x_vector_size() and y_vector_size(). In particular, - this means that the following must hold: - - if (x or y is a sparse vector type) then - - x_vector_size() > 0 && y_vector_size() > 0 - (i.e. you must call set_dimensions() prior to calling add() if - you want to use sparse vectors.) - - if (x is a dlib::matrix) then - - x_vector_size() == 0 || x.size() == x_vector_size() - - else - - max_index_plus_one(x) <= x_vector_size() - - if (y is a dlib::matrix) then - - y_vector_size() == 0 || y.size() == y_vector_size() - - else - - max_index_plus_one(y) <= y_vector_size() - ensures - - updates the mean and cross-covariance matrices stored in this object so - that the new (x,y) vector pair is factored into them. - - if (x is a dlib::matrix) then - - #x_vector_size() == x.size() - - if (y is a dlib::matrix) then - - #y_vector_size() == y.size() - !*/ - - const column_matrix mean_x ( - ) const; - /*! - requires - - current_n() != 0 - ensures - - returns the mean of all the x vectors presented to this object so far. - - The returned vector will have x_vector_size() dimensions. - !*/ - - const column_matrix mean_y ( - ) const; - /*! - requires - - current_n() != 0 - ensures - - returns the mean of all the y vectors presented to this object so far. - - The returned vector will have y_vector_size() dimensions. - !*/ - - const general_matrix covariance_xy ( - ) const; - /*! - requires - - current_n() > 1 - ensures - - returns the unbiased sample cross-covariance matrix for all the vector - pairs presented to this object so far. In particular, returns a matrix - M such that: - - M.nr() == x_vector_size() - - M.nc() == y_vector_size() - - M == the cross-covariance matrix of the data given to add(). - !*/ - - const running_cross_covariance operator+ ( - const running_cross_covariance& item - ) const; - /*! - requires - - x_vector_size() == 0 || item.x_vector_size() == 0 || x_vector_size() == item.x_vector_size() - (i.e. the x_vector_size() of *this and item must match or one must be zero) - - y_vector_size() == 0 || item.y_vector_size() == 0 || y_vector_size() == item.y_vector_size() - (i.e. the y_vector_size() of *this and item must match or one must be zero) - ensures - - returns a new running_cross_covariance object that represents the - combination of all the vectors given to *this and item. That is, this - function returns a running_cross_covariance object, R, that is equivalent - to what you would obtain if all calls to this->add() and item.add() had - instead been done to R. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - class vector_normalizer - { - /*! - REQUIREMENTS ON matrix_type - - must be a dlib::matrix object capable of representing column - vectors - - INITIAL VALUE - - in_vector_size() == 0 - - out_vector_size() == 0 - - means().size() == 0 - - std_devs().size() == 0 - - WHAT THIS OBJECT REPRESENTS - This object represents something that can learn to normalize a set - of column vectors. In particular, normalized column vectors should - have zero mean and a variance of one. - - Also, if desired, this object can use principal component - analysis for the purposes of reducing the number of elements in a - vector. - - THREAD SAFETY - Note that this object contains a cached matrix object it uses - to store intermediate results for normalization. This avoids - needing to reallocate it every time this object performs normalization - but also makes it non-thread safe. So make sure you don't share - instances of this object between threads. - !*/ - - public: - typedef typename matrix_type::mem_manager_type mem_manager_type; - typedef typename matrix_type::type scalar_type; - typedef matrix_type result_type; - - template <typename vector_type> - void train ( - const vector_type& samples - ); - /*! - requires - - samples.size() > 0 - - samples == a column matrix or something convertible to a column - matrix via mat(). Also, x should contain - matrix_type objects that represent nonempty column vectors. - - samples does not contain any infinite or NaN values - ensures - - #in_vector_size() == samples(0).nr() - - #out_vector_size() == samples(0).nr() - - This object has learned how to normalize vectors that look like - vectors in the given set of samples. - - #means() == mean(samples) - - #std_devs() == reciprocal(sqrt(variance(samples))); - !*/ - - long in_vector_size ( - ) const; - /*! - ensures - - returns the number of rows that input vectors are - required to contain if they are to be normalized by - this object. - !*/ - - long out_vector_size ( - ) const; - /*! - ensures - - returns the number of rows in the normalized vectors - that come out of this object. - !*/ - - const matrix_type& means ( - ) const; - /*! - ensures - - returns a matrix M such that: - - M.nc() == 1 - - M.nr() == in_vector_size() - - M(i) == the mean of the ith input feature shown to train() - !*/ - - const matrix_type& std_devs ( - ) const; - /*! - ensures - - returns a matrix SD such that: - - SD.nc() == 1 - - SD.nr() == in_vector_size() - - SD(i) == the reciprocal of the standard deviation of the ith - input feature shown to train() - !*/ - - const result_type& operator() ( - const matrix_type& x - ) const; - /*! - requires - - x.nr() == in_vector_size() - - x.nc() == 1 - ensures - - returns a normalized version of x, call it Z, that has the - following properties: - - Z.nr() == out_vector_size() - - Z.nc() == 1 - - the mean of each element of Z is 0 - - the variance of each element of Z is 1 - - Z == pointwise_multiply(x-means(), std_devs()); - !*/ - - void swap ( - vector_normalizer& item - ); - /*! - ensures - - swaps *this and item - !*/ - }; - - template < - typename matrix_type - > - inline void swap ( - vector_normalizer<matrix_type>& a, - vector_normalizer<matrix_type>& b - ) { a.swap(b); } - /*! - provides a global swap function - !*/ - - template < - typename matrix_type, - > - void deserialize ( - vector_normalizer<matrix_type>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - - template < - typename matrix_type, - > - void serialize ( - const vector_normalizer<matrix_type>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - class vector_normalizer_pca - { - /*! - REQUIREMENTS ON matrix_type - - must be a dlib::matrix object capable of representing column - vectors - - INITIAL VALUE - - in_vector_size() == 0 - - out_vector_size() == 0 - - means().size() == 0 - - std_devs().size() == 0 - - pca_matrix().size() == 0 - - WHAT THIS OBJECT REPRESENTS - This object represents something that can learn to normalize a set - of column vectors. In particular, normalized column vectors should - have zero mean and a variance of one. - - Also, this object uses principal component analysis for the purposes - of reducing the number of elements in a vector. - - THREAD SAFETY - Note that this object contains a cached matrix object it uses - to store intermediate results for normalization. This avoids - needing to reallocate it every time this object performs normalization - but also makes it non-thread safe. So make sure you don't share - instances of this object between threads. - !*/ - - public: - typedef typename matrix_type::mem_manager_type mem_manager_type; - typedef typename matrix_type::type scalar_type; - typedef matrix<scalar_type,0,1,mem_manager_type> result_type; - - template <typename vector_type> - void train ( - const vector_type& samples, - const double eps = 0.99 - ); - /*! - requires - - 0 < eps <= 1 - - samples.size() > 0 - - samples == a column matrix or something convertible to a column - matrix via mat(). Also, x should contain - matrix_type objects that represent nonempty column vectors. - - samples does not contain any infinite or NaN values - ensures - - This object has learned how to normalize vectors that look like - vectors in the given set of samples. - - Principal component analysis is performed to find a transform - that might reduce the number of output features. - - #in_vector_size() == samples(0).nr() - - 0 < #out_vector_size() <= samples(0).nr() - - eps is a number that controls how "lossy" the pca transform will be. - Large values of eps result in #out_vector_size() being larger and - smaller values of eps result in #out_vector_size() being smaller. - - #means() == mean(samples) - - #std_devs() == reciprocal(sqrt(variance(samples))); - - #pca_matrix() == the PCA transform matrix that is out_vector_size() - rows by in_vector_size() columns. - !*/ - - long in_vector_size ( - ) const; - /*! - ensures - - returns the number of rows that input vectors are - required to contain if they are to be normalized by - this object. - !*/ - - long out_vector_size ( - ) const; - /*! - ensures - - returns the number of rows in the normalized vectors - that come out of this object. - !*/ - - const matrix<scalar_type,0,1,mem_manager_type>& means ( - ) const; - /*! - ensures - - returns a matrix M such that: - - M.nc() == 1 - - M.nr() == in_vector_size() - - M(i) == the mean of the ith input feature shown to train() - !*/ - - const matrix<scalar_type,0,1,mem_manager_type>& std_devs ( - ) const; - /*! - ensures - - returns a matrix SD such that: - - SD.nc() == 1 - - SD.nr() == in_vector_size() - - SD(i) == the reciprocal of the standard deviation of the ith - input feature shown to train() - !*/ - - const matrix<scalar_type,0,0,mem_manager_type>& pca_matrix ( - ) const; - /*! - ensures - - returns a matrix PCA such that: - - PCA.nr() == out_vector_size() - - PCA.nc() == in_vector_size() - - PCA == the principal component analysis transformation - matrix - !*/ - - const result_type& operator() ( - const matrix_type& x - ) const; - /*! - requires - - x.nr() == in_vector_size() - - x.nc() == 1 - ensures - - returns a normalized version of x, call it Z, that has the - following properties: - - Z.nr() == out_vector_size() - - Z.nc() == 1 - - the mean of each element of Z is 0 - - the variance of each element of Z is 1 - - Z == pca_matrix()*pointwise_multiply(x-means(), std_devs()); - !*/ - - void swap ( - vector_normalizer_pca& item - ); - /*! - ensures - - swaps *this and item - !*/ - }; - - template < - typename matrix_type - > - inline void swap ( - vector_normalizer_pca<matrix_type>& a, - vector_normalizer_pca<matrix_type>& b - ) { a.swap(b); } - /*! - provides a global swap function - !*/ - - template < - typename matrix_type, - > - void deserialize ( - vector_normalizer_pca<matrix_type>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - - template < - typename matrix_type, - > - void serialize ( - const vector_normalizer_pca<matrix_type>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_STATISTICs_ABSTRACT_ - diff --git a/ml/dlib/dlib/statistics/vector_normalizer_frobmetric.h b/ml/dlib/dlib/statistics/vector_normalizer_frobmetric.h deleted file mode 100644 index 690370f80..000000000 --- a/ml/dlib/dlib/statistics/vector_normalizer_frobmetric.h +++ /dev/null @@ -1,618 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#ifndef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_ -#define DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_ - -#include "vector_normalizer_frobmetric_abstract.h" -#include "../matrix.h" -#include "../optimization.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - struct frobmetric_training_sample - { - matrix_type anchor_vect; - std::vector<matrix_type> near_vects; - std::vector<matrix_type> far_vects; - - unsigned long num_triples ( - ) const { return near_vects.size() * far_vects.size(); } - - void clear() - { - near_vects.clear(); - far_vects.clear(); - } - }; - - template < - typename matrix_type - > - void serialize(const frobmetric_training_sample<matrix_type>& item, std::ostream& out) - { - int version = 1; - serialize(version, out); - serialize(item.anchor_vect, out); - serialize(item.near_vects, out); - serialize(item.far_vects, out); - } - - template < - typename matrix_type - > - void deserialize(frobmetric_training_sample<matrix_type>& item, std::istream& in) - { - int version = 0; - deserialize(version, in); - if (version != 1) - throw serialization_error("Unexpected version found while deserializing dlib::frobmetric_training_sample."); - deserialize(item.anchor_vect, in); - deserialize(item.near_vects, in); - deserialize(item.far_vects, in); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - class vector_normalizer_frobmetric - { - - public: - typedef typename matrix_type::mem_manager_type mem_manager_type; - typedef typename matrix_type::type scalar_type; - typedef matrix_type result_type; - - private: - struct compact_frobmetric_training_sample - { - std::vector<matrix_type> near_vects; - std::vector<matrix_type> far_vects; - }; - - struct objective - { - objective ( - const std::vector<compact_frobmetric_training_sample>& samples_, - matrix<double,0,0,mem_manager_type>& Aminus_, - const matrix<double,0,1,mem_manager_type>& bias_ - ) : samples(samples_), Aminus(Aminus_), bias(bias_) {} - - double operator()(const matrix<double,0,1,mem_manager_type>& u) const - { - long idx = 0; - const long dims = samples[0].far_vects[0].size(); - // Here we compute \hat A from the paper, which we refer to as just A in - // the code. - matrix<double,0,0,mem_manager_type> A(dims,dims); - A = 0; - std::vector<double> ufar, unear; - for (unsigned long i = 0; i < samples.size(); ++i) - { - ufar.assign(samples[i].far_vects.size(),0); - unear.assign(samples[i].near_vects.size(),0); - for (unsigned long j = 0; j < unear.size(); ++j) - { - for (unsigned long k = 0; k < ufar.size(); ++k) - { - const double val = u(idx++); - ufar[k] -= val; - unear[j] += val; - } - } - for (unsigned long j = 0; j < unear.size(); ++j) - A += unear[j]*samples[i].near_vects[j]*trans(samples[i].near_vects[j]); - for (unsigned long j = 0; j < ufar.size(); ++j) - A += ufar[j]*samples[i].far_vects[j]*trans(samples[i].far_vects[j]); - } - - eigenvalue_decomposition<matrix<double,0,0,mem_manager_type> > ed(make_symmetric(A)); - Aminus = ed.get_pseudo_v()*diagm(upperbound(ed.get_real_eigenvalues(),0))*trans(ed.get_pseudo_v()); - // Do this to avoid numeric instability later on since the above - // computation can make Aminus slightly non-symmetric. - Aminus = make_symmetric(Aminus); - - return dot(u,bias) - 0.5*sum(squared(Aminus)); - } - - private: - const std::vector<compact_frobmetric_training_sample>& samples; - matrix<double,0,0,mem_manager_type>& Aminus; - const matrix<double,0,1,mem_manager_type>& bias; - }; - - struct derivative - { - derivative ( - unsigned long num_triples_, - const std::vector<compact_frobmetric_training_sample>& samples_, - matrix<double,0,0,mem_manager_type>& Aminus_, - const matrix<double,0,1,mem_manager_type>& bias_ - ) : num_triples(num_triples_), samples(samples_), Aminus(Aminus_), bias(bias_) {} - - matrix<double,0,1,mem_manager_type> operator()(const matrix<double,0,1,mem_manager_type>& ) const - { - // Note that Aminus is a function of u (the argument to this function), but - // since Aminus will have been computed already by the most recent call to - // the objective function we don't need to do anything with u. We can just - // use Aminus right away. - matrix<double,0,1,mem_manager_type> grad(num_triples); - - long idx = 0; - std::vector<double> ufar, unear; - for (unsigned long i = 0; i < samples.size(); ++i) - { - ufar.resize(samples[i].far_vects.size()); - unear.resize(samples[i].near_vects.size()); - - for (unsigned long j = 0; j < unear.size(); ++j) - unear[j] = sum(pointwise_multiply(Aminus, samples[i].near_vects[j]*trans(samples[i].near_vects[j]))); - for (unsigned long j = 0; j < ufar.size(); ++j) - ufar[j] = sum(pointwise_multiply(Aminus, samples[i].far_vects[j]*trans(samples[i].far_vects[j]))); - - for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j) - { - for (unsigned long k = 0; k < samples[i].far_vects.size(); ++k) - { - grad(idx) = bias(idx) + ufar[k]-unear[j]; - idx++; - } - } - } - - return grad; - } - - private: - const unsigned long num_triples; - const std::vector<compact_frobmetric_training_sample>& samples; - matrix<double,0,0,mem_manager_type>& Aminus; - const matrix<double,0,1,mem_manager_type>& bias; - }; - - - class custom_stop_strategy - { - public: - custom_stop_strategy( - double C_, - double eps_, - bool be_verbose_, - unsigned long max_iter_ - ) - { - _c = C_; - - _cur_iter = 0; - _gradient_thresh = eps_; - _max_iter = max_iter_; - _verbose = be_verbose_; - } - - template <typename T> - bool should_continue_search ( - const T& u, - const double , - const T& grad - ) - { - ++_cur_iter; - - double max_gradient = 0; - for (long i = 0; i < grad.size(); ++i) - { - const bool at_lower_bound = (0 >= u(i) && grad(i) > 0); - const bool at_upper_bound = (_c/grad.size() <= u(i) && grad(i) < 0); - if (!at_lower_bound && !at_upper_bound) - max_gradient = std::max(std::abs(grad(i)), max_gradient); - } - - if (_verbose) - { - std::cout << "iteration: " << _cur_iter << " max_gradient: "<< max_gradient << std::endl; - } - - // Only stop when the largest non-bound-constrained element of the gradient - // is lower than the threshold. - if (max_gradient < _gradient_thresh) - return false; - - // Check if we have hit the max allowable number of iterations. - if (_cur_iter > _max_iter) - { - return false; - } - - return true; - } - - private: - bool _verbose; - - unsigned long _max_iter; - unsigned long _cur_iter; - double _c; - double _gradient_thresh; - }; - - public: - vector_normalizer_frobmetric ( - ) - { - verbose = false; - eps = 0.1; - C = 1; - max_iter = 5000; - _use_identity_matrix_prior = false; - } - - bool uses_identity_matrix_prior ( - ) const - { - return _use_identity_matrix_prior; - } - - void set_uses_identity_matrix_prior ( - bool use_prior - ) - { - _use_identity_matrix_prior = use_prior; - } - - void be_verbose( - ) - { - verbose = true; - } - - void set_epsilon ( - double eps_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(eps_ > 0, - "\t void vector_normalizer_frobmetric::set_epsilon(eps_)" - << "\n\t invalid inputs were given to this function" - << "\n\t eps: " << eps_ - ); - eps = eps_; - } - - double get_epsilon ( - ) const - { - return eps; - } - - void set_c ( - double C_ - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(C_ > 0, - "\t void vector_normalizer_frobmetric::set_c()" - << "\n\t C_ must be greater than 0" - << "\n\t C_: " << C_ - << "\n\t this: " << this - ); - - C = C_; - } - - void set_max_iterations ( - unsigned long max_iterations - ) - { - max_iter = max_iterations; - } - - unsigned long get_max_iterations ( - ) const - { - return max_iter; - } - - double get_c ( - ) const - { - return C; - } - - void be_quiet ( - ) - { - verbose = false; - } - - void train ( - const std::vector<frobmetric_training_sample<matrix_type> >& samples - ) - { - // make sure requires clause is not broken - DLIB_ASSERT(samples.size() > 0, - "\tvoid vector_normalizer_frobmetric::train()" - << "\n\t you have to give a nonempty set of samples to this function" - ); -#ifdef ENABLE_ASSERTS - { - const long dims = samples[0].anchor_vect.size(); - DLIB_ASSERT(dims != 0, - "\tvoid vector_normalizer_frobmetric::train()" - << "\n\t The dimension of the input vectors can't be zero." - ); - for (unsigned long i = 0; i < samples.size(); ++i) - { - DLIB_ASSERT(is_col_vector(samples[i].anchor_vect), - "\tvoid vector_normalizer_frobmetric::train()" - << "\n\t Invalid inputs were given to this function." - << "\n\t i: " << i - ); - DLIB_ASSERT(samples[i].anchor_vect.size() == dims, - "\tvoid vector_normalizer_frobmetric::train()" - << "\n\t Invalid inputs were given to this function." - << "\n\t i: " << i - << "\n\t dims: " << dims - << "\n\t samples[i].anchor_vect.size(): " << samples[i].anchor_vect.size() - ); - - DLIB_ASSERT(samples[i].num_triples() != 0, - "\tvoid vector_normalizer_frobmetric::train()" - << "\n\t It is illegal for a training sample to have no data in it" - << "\n\t i: " << i - ); - for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j) - { - DLIB_ASSERT(is_col_vector(samples[i].near_vects[j]), - "\tvoid vector_normalizer_frobmetric::train()" - << "\n\t Invalid inputs were given to this function." - << "\n\t i: " << i - << "\n\t j: " << j - ); - DLIB_ASSERT(samples[i].near_vects[j].size() == dims, - "\tvoid vector_normalizer_frobmetric::train()" - << "\n\t Invalid inputs were given to this function." - << "\n\t i: " << i - << "\n\t j: " << j - << "\n\t dims: " << dims - << "\n\t samples[i].near_vects[j].size(): " << samples[i].near_vects[j].size() - ); - } - for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j) - { - DLIB_ASSERT(is_col_vector(samples[i].far_vects[j]), - "\tvoid vector_normalizer_frobmetric::train()" - << "\n\t Invalid inputs were given to this function." - << "\n\t i: " << i - << "\n\t j: " << j - ); - DLIB_ASSERT(samples[i].far_vects[j].size() == dims, - "\tvoid vector_normalizer_frobmetric::train()" - << "\n\t Invalid inputs were given to this function." - << "\n\t i: " << i - << "\n\t j: " << j - << "\n\t dims: " << dims - << "\n\t samples[i].far_vects[j].size(): " << samples[i].far_vects[j].size() - ); - } - } - } -#endif // end ENABLE_ASSERTS - - - // compute the mean sample - m = 0; - for (unsigned long i = 0; i < samples.size(); ++i) - m += samples[i].anchor_vect; - m /= samples.size(); - - DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer_frobmetric::train() have infinite or NaN values"); - - // Now we need to find tform. So we setup the optimization problem and run it - // over the next few lines of code. - unsigned long num_triples = 0; - for (unsigned long i = 0; i < samples.size(); ++i) - num_triples += samples[i].near_vects.size()*samples[i].far_vects.size(); - - matrix<double,0,1,mem_manager_type> u(num_triples); - matrix<double,0,1,mem_manager_type> bias(num_triples); - u = 0; - bias = 1; - - - // precompute all the anchor_vect to far_vects/near_vects pairs - std::vector<compact_frobmetric_training_sample> data(samples.size()); - unsigned long cnt = 0; - std::vector<double> far_norm, near_norm; - for (unsigned long i = 0; i < data.size(); ++i) - { - far_norm.clear(); - near_norm.clear(); - data[i].far_vects.reserve(samples[i].far_vects.size()); - data[i].near_vects.reserve(samples[i].near_vects.size()); - for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j) - { - data[i].far_vects.push_back(samples[i].anchor_vect - samples[i].far_vects[j]); - if (_use_identity_matrix_prior) - far_norm.push_back(length_squared(data[i].far_vects.back())); - } - for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j) - { - data[i].near_vects.push_back(samples[i].anchor_vect - samples[i].near_vects[j]); - if (_use_identity_matrix_prior) - near_norm.push_back(length_squared(data[i].near_vects.back())); - } - - // Note that this loop only executes if _use_identity_matrix_prior == true. - for (unsigned long j = 0; j < near_norm.size(); ++j) - { - for (unsigned long k = 0; k < far_norm.size(); ++k) - { - bias(cnt++) = 1 - (far_norm[k] - near_norm[j]); - } - } - } - - // Now run the main part of the algorithm - matrix<double,0,0,mem_manager_type> Aminus; - find_max_box_constrained(lbfgs_search_strategy(10), - custom_stop_strategy(C, eps, verbose, max_iter), - objective(data, Aminus, bias), - derivative(num_triples, data, Aminus, bias), - u, 0, C/num_triples); - - - // What we need is the optimal Aminus which is a function of u. So we already - // have what we need and just need to put it into tform. - eigenvalue_decomposition<matrix<double,0,0,mem_manager_type> > ed(make_symmetric(-Aminus)); - matrix<double,0,1,mem_manager_type> eigs = ed.get_real_eigenvalues(); - // But first, discard the components that are zero to within the machine epsilon. - const double tol = max(eigs)*std::numeric_limits<double>::epsilon(); - for (long i = 0; i < eigs.size(); ++i) - { - if (eigs(i) < tol) - eigs(i) = 0; - } - if (_use_identity_matrix_prior) - tform = matrix_cast<scalar_type>(identity_matrix(Aminus) + diagm(sqrt(eigs))*trans(ed.get_pseudo_v())); - else - tform = matrix_cast<scalar_type>(diagm(sqrt(eigs))*trans(ed.get_pseudo_v())); - - // Pre-apply the transform to m so we don't have to do it inside operator() - // every time it's called. - m = tform*m; - } - - long in_vector_size ( - ) const - { - return m.nr(); - } - - long out_vector_size ( - ) const - { - return m.nr(); - } - - const matrix<scalar_type,0,1,mem_manager_type>& transformed_means ( - ) const - { - return m; - } - - const matrix<scalar_type,0,0,mem_manager_type>& transform ( - ) const - { - return tform; - } - - const result_type& operator() ( - const matrix_type& x - ) const - { - // make sure requires clause is not broken - DLIB_ASSERT(in_vector_size() != 0 && in_vector_size() == x.size() && - is_col_vector(x) == true, - "\tmatrix vector_normalizer_frobmetric::operator()" - << "\n\t you have given invalid arguments to this function" - << "\n\t in_vector_size(): " << in_vector_size() - << "\n\t x.size(): " << x.size() - << "\n\t is_col_vector(x): " << is_col_vector(x) - << "\n\t this: " << this - ); - - temp_out = tform*x-m; - return temp_out; - } - - template <typename mt> - friend void deserialize ( - vector_normalizer_frobmetric<mt>& item, - std::istream& in - ); - - template <typename mt> - friend void serialize ( - const vector_normalizer_frobmetric<mt>& item, - std::ostream& out - ); - - private: - - // ------------------- private data members ------------------- - - matrix_type m; - matrix<scalar_type,0,0,mem_manager_type> tform; - bool verbose; - double eps; - double C; - unsigned long max_iter; - bool _use_identity_matrix_prior; - - // This is just a temporary variable that doesn't contribute to the - // state of this object. - mutable matrix_type temp_out; - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - void serialize ( - const vector_normalizer_frobmetric<matrix_type>& item, - std::ostream& out - ) - { - const int version = 2; - serialize(version, out); - - serialize(item.m, out); - serialize(item.tform, out); - serialize(item.verbose, out); - serialize(item.eps, out); - serialize(item.C, out); - serialize(item.max_iter, out); - serialize(item._use_identity_matrix_prior, out); - } - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - void deserialize ( - vector_normalizer_frobmetric<matrix_type>& item, - std::istream& in - ) - { - int version = 0; - deserialize(version, in); - if (version != 1 && version != 2) - throw serialization_error("Unsupported version found while deserializing dlib::vector_normalizer_frobmetric."); - - deserialize(item.m, in); - deserialize(item.tform, in); - deserialize(item.verbose, in); - deserialize(item.eps, in); - deserialize(item.C, in); - deserialize(item.max_iter, in); - if (version == 2) - deserialize(item._use_identity_matrix_prior, in); - else - item._use_identity_matrix_prior = false; - } - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_ - diff --git a/ml/dlib/dlib/statistics/vector_normalizer_frobmetric_abstract.h b/ml/dlib/dlib/statistics/vector_normalizer_frobmetric_abstract.h deleted file mode 100644 index 302628330..000000000 --- a/ml/dlib/dlib/statistics/vector_normalizer_frobmetric_abstract.h +++ /dev/null @@ -1,328 +0,0 @@ -// Copyright (C) 2013 Davis E. King (davis@dlib.net) -// License: Boost Software License See LICENSE.txt for the full license. -#undef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_ -#ifdef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_ - -#include "../matrix.h" - -namespace dlib -{ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - struct frobmetric_training_sample - { - /*! - WHAT THIS OBJECT REPRESENTS - This object represents a training data sample for the - vector_normalizer_frobmetric object. It defines a set of training triplets - relative to a single anchor_vect vector. That is, it specifies that the - learned distance metric should satisfy num_triples() constraints which are, - for all valid i and j: - length(T*anchor_vect-T*near_vects[i]) + 1 < length(T*anchor_vect - T*far_vects[j]) - for some appropriate linear transformation T which will be learned by - vector_normalizer_frobmetric. - !*/ - - matrix_type anchor_vect; - std::vector<matrix_type> near_vects; - std::vector<matrix_type> far_vects; - - unsigned long num_triples ( - ) const { return near_vects.size() * far_vects.size(); } - /*! - ensures - - returns the number of training triplets defined by this object. - !*/ - - void clear() - /*! - ensures - - #near_vects.size() == 0 - - #far_vects.size() == 0 - !*/ - }; - - template < typename matrix_type > - void serialize(const frobmetric_training_sample<matrix_type>& item, std::ostream& out) - template < typename matrix_type > - void deserialize(frobmetric_training_sample<matrix_type>& item, std::istream& in) - /*! - provides serialisation support. - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - class vector_normalizer_frobmetric - { - /*! - REQUIREMENTS ON matrix_type - - must be a dlib::matrix object capable of representing column - vectors - - INITIAL VALUE - - in_vector_size() == 0 - - out_vector_size() == 0 - - get_epsilon() == 0.1 - - get_c() == 1 - - get_max_iterations() == 5000 - - This object is not verbose - - uses_identity_matrix_prior() == false - - WHAT THIS OBJECT REPRESENTS - This object is a tool for performing the FrobMetric distance metric - learning algorithm described in the following paper: - A Scalable Dual Approach to Semidefinite Metric Learning - By Chunhua Shen, Junae Kim, Lei Wang, in CVPR 2011 - - Therefore, this object is a tool that takes as input training triplets - (anchor_vect, near, far) of vectors and attempts to learn a linear - transformation T such that: - length(T*anchor_vect-T*near) + 1 < length(T*anchor_vect - T*far) - That is, you give a bunch of anchor_vect vectors and for each anchor_vect - you specify some vectors which should be near to it and some that should be - far form it. This object then tries to find a transformation matrix that - makes the "near" vectors close to their anchors while the "far" vectors are - farther away. - - THREAD SAFETY - Note that this object contains a cached matrix object it uses - to store intermediate results for normalization. This avoids - needing to reallocate it every time this object performs normalization - but also makes it non-thread safe. So make sure you don't share - instances of this object between threads. - !*/ - - public: - typedef typename matrix_type::mem_manager_type mem_manager_type; - typedef typename matrix_type::type scalar_type; - typedef matrix_type result_type; - - vector_normalizer_frobmetric ( - ); - /*! - ensures - - this object is properly initialized - !*/ - - bool uses_identity_matrix_prior ( - ) const; - /*! - ensures - - Normally this object will try and find a matrix transform() that - minimizes sum(squared(transform())) but also fits the training data. - However, if #uses_identity_matrix_prior() == true then it will instead - try to find the transformation matrix that minimizes - sum(squared(identity_matrix()-transform())). That is, it will try to - find the matrix most similar to the identity matrix that best fits the - training data. - !*/ - - void set_uses_identity_matrix_prior ( - bool use_prior - ); - /*! - ensures - - #uses_identity_matrix_prior() == use_prior - !*/ - - void be_verbose( - ); - /*! - ensures - - This object will print status messages to standard out so the user can - observe the progress of the train() routine. - !*/ - - void be_quiet ( - ); - /*! - ensures - - this object will not print anything to standard out. - !*/ - - void set_epsilon ( - double eps - ); - /*! - requires - - eps > 0 - ensures - - #get_epsilon() == eps - !*/ - - double get_epsilon ( - ) const; - /*! - ensures - - returns the error epsilon that determines when training should stop. - Smaller values may result in a more accurate solution but take longer to - execute. - !*/ - - void set_c ( - double C - ); - /*! - requires - - C > 0 - ensures - - #set_c() == C - !*/ - - double get_c ( - ) const; - /*! - ensures - - returns the regularization parameter. It is the parameter that - determines the trade-off between trying to fit the training data exactly - or allowing more errors but hopefully improving the generalization of the - resulting distance metric. Larger values encourage exact fitting while - smaller values of C may encourage better generalization. - !*/ - - void set_max_iterations ( - unsigned long max_iterations - ); - /*! - ensures - - #get_max_iterations() == max_iterations - !*/ - - unsigned long get_max_iterations ( - ) const; - /*! - ensures - - The train() routine uses an iterative numerical solver to find the best - distance metric. This function returns the maximum allowable number of - iterations it will use before terminating. Note that typically the - solver terminates prior to the max iteration count limit due to the error - dropping below get_epsilon(). - !*/ - - void train ( - const std::vector<frobmetric_training_sample<matrix_type> >& samples - ); - /*! - requires - - samples.size() != 0 - - All matrices inside samples (i.e. anchors and elements of near_vects and far_vects) - are column vectors with the same non-zero dimension. - - All the vectors in samples contain finite values. - - All elements of samples contain data, specifically, for all valid i: - - samples[i].num_triples() != 0 - ensures - - learns a distance metric from the given training samples. After train - finishes you can use this object's operator() to transform vectors - according to the learned distance metric. In particular, we will have: - - #transform() == The linear transformation learned by the FrobMetric - learning procedure. - - #in_vector_size() == samples[0].anchor_vect.size() - - You can call (*this)(x) to transform a vector according to the learned - distance metric. That is, it should generally be the case that: - - length((*this)(anchor_vect) - (*this)(near)) + 1 < length((*this)(anchor_vect) - (*this)(far)) - for the anchor_vect, near, and far vectors in the training data. - - #transformed_means() == the mean of the input anchor_vect vectors - after being transformed by #transform() - !*/ - - long in_vector_size ( - ) const; - /*! - ensures - - returns the number of rows that input vectors are required to contain if - they are to be normalized by this object. - !*/ - - long out_vector_size ( - ) const; - /*! - ensures - - returns the number of rows in the normalized vectors that come out of - this object. - - The value returned is always in_vector_size(). So out_vector_size() is - just provided to maintain interface consistency with other vector - normalizer objects. That is, the transformations applied by this object - do not change the dimensionality of the vectors. - !*/ - - const matrix<scalar_type,0,1,mem_manager_type>& transformed_means ( - ) const; - /*! - ensures - - returns a column vector V such that: - - V.size() == in_vector_size() - - V is a vector such that subtracting it from transformed vectors - results in them having an expected value of 0. Therefore, it is - equal to transform() times the mean of the input anchor_vect vectors - given to train(). - !*/ - - const matrix<scalar_type,0,0,mem_manager_type>& transform ( - ) const; - /*! - ensures - - returns a copy of the transformation matrix we learned during the last - call to train(). - - The returned matrix is square and has in_vector_size() by in_vector_size() - dimensions. - !*/ - - const result_type& operator() ( - const matrix_type& x - ) const; - /*! - requires - - in_vector_size() != 0 - - in_vector_size() == x.size() - - is_col_vector(x) == true - ensures - - returns a normalized version of x, call it Z, that has the following - properties: - - Z == The result of applying the linear transform we learned during - train() to the input vector x. - - Z == transform()*x-transformed_means() - - is_col_vector(Z) == true - - Z.size() == x.size() - - The expected value of each element of Z is 0. - !*/ - }; - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - void serialize ( - const vector_normalizer_frobmetric<matrix_type>& item, - std::ostream& out - ); - /*! - provides serialization support - !*/ - -// ---------------------------------------------------------------------------------------- - - template < - typename matrix_type - > - void deserialize ( - vector_normalizer_frobmetric<matrix_type>& item, - std::istream& in - ); - /*! - provides deserialization support - !*/ - -// ---------------------------------------------------------------------------------------- - -} - -#endif // DLIB_VECTOR_NORMALIZER_FRoBMETRIC_ABSTRACT_Hh_ - |