summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/statistics/dpca.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/statistics/dpca.h')
-rw-r--r--ml/dlib/dlib/statistics/dpca.h541
1 files changed, 0 insertions, 541 deletions
diff --git a/ml/dlib/dlib/statistics/dpca.h b/ml/dlib/dlib/statistics/dpca.h
deleted file mode 100644
index cae784682..000000000
--- a/ml/dlib/dlib/statistics/dpca.h
+++ /dev/null
@@ -1,541 +0,0 @@
-// Copyright (C) 2009 Davis E. King (davis@dlib.net)
-// License: Boost Software License See LICENSE.txt for the full license.
-#ifndef DLIB_DPCA_h_
-#define DLIB_DPCA_h_
-
-#include "dpca_abstract.h"
-#include <limits>
-#include <cmath>
-#include "../algs.h"
-#include "../matrix.h"
-#include <iostream>
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
- template <
- typename matrix_type
- >
- class discriminant_pca
- {
- /*!
- INITIAL VALUE
- - vect_size == 0
- - total_count == 0
- - between_count == 0
- - within_count == 0
- - between_weight == 1
- - within_weight == 1
-
- CONVENTION
- - vect_size == in_vector_size()
- - total_count == the number of times add_to_total_variance() has been called.
- - within_count == the number of times add_to_within_class_variance() has been called.
- - between_count == the number of times add_to_between_class_variance() has been called.
- - between_weight == between_class_weight()
- - within_weight == within_class_weight()
-
- - if (total_count != 0)
- - total_sum == the sum of all vectors given to add_to_total_variance()
- - the covariance of all the elements given to add_to_total_variance() is given
- by:
- - let avg == total_sum/total_count
- - covariance == total_cov/total_count - avg*trans(avg)
- - if (within_count != 0)
- - within_cov/within_count == the normalized within class scatter matrix
- - if (between_count != 0)
- - between_cov/between_count == the normalized between class scatter matrix
- !*/
-
- public:
-
- struct discriminant_pca_error : public error
- {
- discriminant_pca_error(const std::string& message): error(message) {}
- };
-
- typedef typename matrix_type::mem_manager_type mem_manager_type;
- typedef typename matrix_type::type scalar_type;
- typedef typename matrix_type::layout_type layout_type;
- typedef matrix<scalar_type,0,0,mem_manager_type,layout_type> general_matrix;
- typedef matrix<scalar_type,0,1,mem_manager_type,layout_type> column_matrix;
-
- discriminant_pca (
- )
- {
- clear();
- }
-
- void clear(
- )
- {
- total_count = 0;
- between_count = 0;
- within_count = 0;
-
- vect_size = 0;
-
-
- between_weight = 1;
- within_weight = 1;
-
-
- total_sum.set_size(0);
- between_cov.set_size(0,0);
- total_cov.set_size(0,0);
- within_cov.set_size(0,0);
- }
-
- long in_vector_size (
- ) const
- {
- return vect_size;
- }
-
- void set_within_class_weight (
- scalar_type weight
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(weight >= 0,
- "\t void discriminant_pca::set_within_class_weight()"
- << "\n\t You can't use negative weight values"
- << "\n\t weight: " << weight
- << "\n\t this: " << this
- );
-
- within_weight = weight;
- }
-
- scalar_type within_class_weight (
- ) const
- {
- return within_weight;
- }
-
- void set_between_class_weight (
- scalar_type weight
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(weight >= 0,
- "\t void discriminant_pca::set_between_class_weight()"
- << "\n\t You can't use negative weight values"
- << "\n\t weight: " << weight
- << "\n\t this: " << this
- );
-
- between_weight = weight;
- }
-
- scalar_type between_class_weight (
- ) const
- {
- return between_weight;
- }
-
- template <typename EXP1, typename EXP2>
- void add_to_within_class_variance(
- const matrix_exp<EXP1>& x,
- const matrix_exp<EXP2>& y
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) &&
- x.size() == y.size() &&
- (in_vector_size() == 0 || x.size() == in_vector_size()),
- "\t void discriminant_pca::add_to_within_class_variance()"
- << "\n\t Invalid inputs were given to this function"
- << "\n\t is_col_vector(x): " << is_col_vector(x)
- << "\n\t is_col_vector(y): " << is_col_vector(y)
- << "\n\t x.size(): " << x.size()
- << "\n\t y.size(): " << y.size()
- << "\n\t in_vector_size(): " << in_vector_size()
- << "\n\t this: " << this
- );
-
- vect_size = x.size();
- if (within_count == 0)
- {
- within_cov = (x-y)*trans(x-y);
- }
- else
- {
- within_cov += (x-y)*trans(x-y);
- }
- ++within_count;
- }
-
- template <typename EXP1, typename EXP2>
- void add_to_between_class_variance(
- const matrix_exp<EXP1>& x,
- const matrix_exp<EXP2>& y
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(is_col_vector(x) && is_col_vector(y) &&
- x.size() == y.size() &&
- (in_vector_size() == 0 || x.size() == in_vector_size()),
- "\t void discriminant_pca::add_to_between_class_variance()"
- << "\n\t Invalid inputs were given to this function"
- << "\n\t is_col_vector(x): " << is_col_vector(x)
- << "\n\t is_col_vector(y): " << is_col_vector(y)
- << "\n\t x.size(): " << x.size()
- << "\n\t y.size(): " << y.size()
- << "\n\t in_vector_size(): " << in_vector_size()
- << "\n\t this: " << this
- );
-
- vect_size = x.size();
- if (between_count == 0)
- {
- between_cov = (x-y)*trans(x-y);
- }
- else
- {
- between_cov += (x-y)*trans(x-y);
- }
- ++between_count;
- }
-
- template <typename EXP>
- void add_to_total_variance(
- const matrix_exp<EXP>& x
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(is_col_vector(x) && (in_vector_size() == 0 || x.size() == in_vector_size()),
- "\t void discriminant_pca::add_to_total_variance()"
- << "\n\t Invalid inputs were given to this function"
- << "\n\t is_col_vector(x): " << is_col_vector(x)
- << "\n\t in_vector_size(): " << in_vector_size()
- << "\n\t x.size(): " << x.size()
- << "\n\t this: " << this
- );
-
- vect_size = x.size();
- if (total_count == 0)
- {
- total_cov = x*trans(x);
- total_sum = x;
- }
- else
- {
- total_cov += x*trans(x);
- total_sum += x;
- }
- ++total_count;
- }
-
- const general_matrix dpca_matrix (
- const double eps = 0.99
- ) const
- {
- general_matrix dpca_mat;
- general_matrix eigenvalues;
- dpca_matrix(dpca_mat, eigenvalues, eps);
- return dpca_mat;
- }
-
- const general_matrix dpca_matrix_of_size (
- const long num_rows
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(0 < num_rows && num_rows <= in_vector_size(),
- "\t general_matrix discriminant_pca::dpca_matrix_of_size()"
- << "\n\t Invalid inputs were given to this function"
- << "\n\t num_rows: " << num_rows
- << "\n\t in_vector_size(): " << in_vector_size()
- << "\n\t this: " << this
- );
-
- general_matrix dpca_mat;
- general_matrix eigenvalues;
- dpca_matrix_of_size(dpca_mat, eigenvalues, num_rows);
- return dpca_mat;
- }
-
- void dpca_matrix (
- general_matrix& dpca_mat,
- general_matrix& eigenvalues,
- const double eps = 0.99
- ) const
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(0 < eps && eps <= 1 && in_vector_size() != 0,
- "\t void discriminant_pca::dpca_matrix()"
- << "\n\t Invalid inputs were given to this function"
- << "\n\t eps: " << eps
- << "\n\t in_vector_size(): " << in_vector_size()
- << "\n\t this: " << this
- );
-
- compute_dpca_matrix(dpca_mat, eigenvalues, eps, 0);
- }
-
- void dpca_matrix_of_size (
- general_matrix& dpca_mat,
- general_matrix& eigenvalues,
- const long num_rows
- )
- {
- // make sure requires clause is not broken
- DLIB_ASSERT(0 < num_rows && num_rows <= in_vector_size(),
- "\t general_matrix discriminant_pca::dpca_matrix_of_size()"
- << "\n\t Invalid inputs were given to this function"
- << "\n\t num_rows: " << num_rows
- << "\n\t in_vector_size(): " << in_vector_size()
- << "\n\t this: " << this
- );
-
- compute_dpca_matrix(dpca_mat, eigenvalues, 1, num_rows);
- }
-
- void swap (
- discriminant_pca& item
- )
- {
- using std::swap;
- swap(total_cov, item.total_cov);
- swap(total_sum, item.total_sum);
- swap(total_count, item.total_count);
- swap(vect_size, item.vect_size);
- swap(between_cov, item.between_cov);
-
- swap(between_count, item.between_count);
- swap(between_weight, item.between_weight);
- swap(within_cov, item.within_cov);
- swap(within_count, item.within_count);
- swap(within_weight, item.within_weight);
- }
-
- friend void deserialize (
- discriminant_pca& item,
- std::istream& in
- )
- {
- deserialize( item.total_cov, in);
- deserialize( item.total_sum, in);
- deserialize( item.total_count, in);
- deserialize( item.vect_size, in);
- deserialize( item.between_cov, in);
- deserialize( item.between_count, in);
- deserialize( item.between_weight, in);
- deserialize( item.within_cov, in);
- deserialize( item.within_count, in);
- deserialize( item.within_weight, in);
- }
-
- friend void serialize (
- const discriminant_pca& item,
- std::ostream& out
- )
- {
- serialize( item.total_cov, out);
- serialize( item.total_sum, out);
- serialize( item.total_count, out);
- serialize( item.vect_size, out);
- serialize( item.between_cov, out);
- serialize( item.between_count, out);
- serialize( item.between_weight, out);
- serialize( item.within_cov, out);
- serialize( item.within_count, out);
- serialize( item.within_weight, out);
- }
-
- discriminant_pca operator+ (
- const discriminant_pca& item
- ) const
- {
- // make sure requires clause is not broken
- DLIB_ASSERT((in_vector_size() == 0 || item.in_vector_size() == 0 || in_vector_size() == item.in_vector_size()) &&
- between_class_weight() == item.between_class_weight() &&
- within_class_weight() == item.within_class_weight(),
- "\t discriminant_pca discriminant_pca::operator+()"
- << "\n\t The two discriminant_pca objects being added must have compatible parameters"
- << "\n\t in_vector_size(): " << in_vector_size()
- << "\n\t item.in_vector_size(): " << item.in_vector_size()
- << "\n\t between_class_weight(): " << between_class_weight()
- << "\n\t item.between_class_weight(): " << item.between_class_weight()
- << "\n\t within_class_weight(): " << within_class_weight()
- << "\n\t item.within_class_weight(): " << item.within_class_weight()
- << "\n\t this: " << this
- );
-
- discriminant_pca temp(item);
-
- // We need to make sure to ignore empty matrices. That's what these if statements
- // are for.
-
- if (total_count != 0 && temp.total_count != 0)
- {
- temp.total_cov += total_cov;
- temp.total_sum += total_sum;
- temp.total_count += total_count;
- }
- else if (total_count != 0)
- {
- temp.total_cov = total_cov;
- temp.total_sum = total_sum;
- temp.total_count = total_count;
- }
-
- if (between_count != 0 && temp.between_count != 0)
- {
- temp.between_cov += between_cov;
- temp.between_count += between_count;
- }
- else if (between_count != 0)
- {
- temp.between_cov = between_cov;
- temp.between_count = between_count;
- }
-
- if (within_count != 0 && temp.within_count != 0)
- {
- temp.within_cov += within_cov;
- temp.within_count += within_count;
- }
- else if (within_count != 0)
- {
- temp.within_cov = within_cov;
- temp.within_count = within_count;
- }
-
- return temp;
- }
-
- discriminant_pca& operator+= (
- const discriminant_pca& rhs
- )
- {
- (*this + rhs).swap(*this);
- return *this;
- }
-
- private:
-
- void compute_dpca_matrix (
- general_matrix& dpca_mat,
- general_matrix& eigenvalues,
- const double eps,
- long num_rows
- ) const
- {
- general_matrix cov;
-
- // now combine the three measures of variance into a single matrix by using the
- // within_weight and between_weight weights.
- cov = get_total_covariance_matrix();
- if (within_count != 0)
- cov -= within_weight*within_cov/within_count;
- if (between_count != 0)
- cov += between_weight*between_cov/between_count;
-
-
- eigenvalue_decomposition<general_matrix> eig(make_symmetric(cov));
-
- eigenvalues = eig.get_real_eigenvalues();
- dpca_mat = eig.get_pseudo_v();
-
- // sort the eigenvalues and eigenvectors so that the biggest eigenvalues come first
- rsort_columns(dpca_mat, eigenvalues);
-
- long num_vectors = 0;
- if (num_rows == 0)
- {
- // Some of the eigenvalues might be negative. So first lets zero those out
- // so they won't get considered.
- eigenvalues = pointwise_multiply(eigenvalues > 0, eigenvalues);
- // figure out how many eigenvectors we want in our dpca matrix
- const double thresh = sum(eigenvalues)*eps;
- double total = 0;
- for (long r = 0; r < eigenvalues.size() && total < thresh; ++r)
- {
- // Don't even think about looking at eigenvalues that are 0. If we go this
- // far then we have all we need.
- if (eigenvalues(r) == 0)
- break;
-
- ++num_vectors;
- total += eigenvalues(r);
- }
-
- if (num_vectors == 0)
- throw discriminant_pca_error("While performing discriminant_pca, all eigenvalues were negative or 0");
- }
- else
- {
- num_vectors = num_rows;
- }
-
-
- // So now we know we want to use num_vectors of the first eigenvectors. So
- // pull those out and discard the rest.
- dpca_mat = trans(colm(dpca_mat,range(0,num_vectors-1)));
-
- // also clip off the eigenvalues we aren't using
- eigenvalues = rowm(eigenvalues, range(0,num_vectors-1));
-
- }
-
- general_matrix get_total_covariance_matrix (
- ) const
- /*!
- ensures
- - returns the covariance matrix of all the data given to the add_to_total_variance()
- !*/
- {
- // if we don't even know the dimensionality of the vectors we are dealing
- // with then just return an empty matrix
- if (vect_size == 0)
- return general_matrix();
-
- // we know the vector size but we have zero total covariance.
- if (total_count == 0)
- {
- general_matrix temp(vect_size,vect_size);
- temp = 0;
- return temp;
- }
-
- // In this case we actually have something to make a total covariance matrix out of.
- // So do that.
- column_matrix avg = total_sum/total_count;
-
- return total_cov/total_count - avg*trans(avg);
- }
-
- general_matrix total_cov;
- column_matrix total_sum;
- scalar_type total_count;
-
- long vect_size;
-
- general_matrix between_cov;
- scalar_type between_count;
- scalar_type between_weight;
-
- general_matrix within_cov;
- scalar_type within_count;
- scalar_type within_weight;
- };
-
- template <
- typename matrix_type
- >
- inline void swap (
- discriminant_pca<matrix_type>& a,
- discriminant_pca<matrix_type>& b
- ) { a.swap(b); }
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_DPCA_h_
-
-