summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/svm/linearly_independent_subset_finder_abstract.h
diff options
context:
space:
mode:
Diffstat (limited to 'ml/dlib/dlib/svm/linearly_independent_subset_finder_abstract.h')
-rw-r--r--ml/dlib/dlib/svm/linearly_independent_subset_finder_abstract.h327
1 files changed, 327 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/linearly_independent_subset_finder_abstract.h b/ml/dlib/dlib/svm/linearly_independent_subset_finder_abstract.h
new file mode 100644
index 000000000..3224f9a0a
--- /dev/null
+++ b/ml/dlib/dlib/svm/linearly_independent_subset_finder_abstract.h
@@ -0,0 +1,327 @@
+// Copyright (C) 2008 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#undef DLIB_LISf_ABSTRACT_
+#ifdef DLIB_LISf_ABSTRACT_
+
+#include "../algs.h"
+#include "../serialize.h"
+#include "kernel_abstract.h"
+
+namespace dlib
+{
+
+ template <
+ typename kernel_type
+ >
+ class linearly_independent_subset_finder
+ {
+ /*!
+ REQUIREMENTS ON kernel_type
+ is a kernel function object as defined in dlib/svm/kernel_abstract.h
+
+ INITIAL VALUE
+ - size() == 0
+
+ WHAT THIS OBJECT REPRESENTS
+ This is an implementation of an online algorithm for recursively finding a
+ set (aka dictionary) of linearly independent vectors in a kernel induced
+ feature space. To use it you decide how large you would like the dictionary
+ to be and then you feed it sample points.
+
+ The implementation uses the Approximately Linearly Dependent metric described
+ in the paper The Kernel Recursive Least Squares Algorithm by Yaakov Engel to
+ decide which points are more linearly independent than others. The metric is
+ simply the squared distance between a test point and the subspace spanned by
+ the set of dictionary vectors.
+
+ Each time you present this object with a new sample point (via this->add())
+ it calculates the projection distance and if it is sufficiently large then this
+ new point is included into the dictionary. Note that this object can be configured
+ to have a maximum size. Once the max dictionary size is reached each new point
+ kicks out a previous point. This is done by removing the dictionary vector that
+ has the smallest projection distance onto the others. That is, the "least linearly
+ independent" vector is removed to make room for the new one.
+ !*/
+
+ public:
+ typedef typename kernel_type::scalar_type scalar_type;
+ typedef typename kernel_type::sample_type sample_type;
+ typedef typename kernel_type::sample_type type;
+ typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+ linearly_independent_subset_finder (
+ );
+ /*!
+ ensures
+ - #minimum_tolerance() == 0.001
+ - this object is properly initialized
+ - #get_kernel() == kernel_type() (i.e. whatever the default is for the supplied kernel)
+ - #max_dictionary_size() == 100
+ !*/
+
+ linearly_independent_subset_finder (
+ const kernel_type& kernel_,
+ unsigned long max_dictionary_size_,
+ scalar_type min_tolerance = 0.001
+ );
+ /*!
+ requires
+ - min_tolerance > 0
+ - max_dictionary_size > 1
+ ensures
+ - #minimum_tolerance() == min_tolerance
+ - this object is properly initialized
+ - #get_kernel() == kernel_
+ - #max_dictionary_size() == max_dictionary_size_
+ !*/
+
+ const kernel_type& get_kernel (
+ ) const;
+ /*!
+ ensures
+ - returns a const reference to the kernel used by this object
+ !*/
+
+ unsigned long max_dictionary_size(
+ ) const;
+ /*!
+ ensures
+ - returns the maximum number of dictionary vectors this object
+ will accumulate. That is, size() will never be
+ greater than max_dictionary_size().
+ !*/
+
+ scalar_type minimum_tolerance(
+ ) const;
+ /*!
+ ensures
+ - returns the minimum projection error necessary to include a sample point
+ into the dictionary.
+ !*/
+
+ void set_minimum_tolerance (
+ scalar_type min_tolerance
+ );
+ /*!
+ requires
+ - min_tolerance > 0
+ ensures
+ - #minimum_tolerance() == min_tolerance
+ !*/
+
+ void clear_dictionary (
+ );
+ /*!
+ ensures
+ - clears out all the data (e.g. #size() == 0)
+ !*/
+
+ bool add (
+ const sample_type& x
+ );
+ /*!
+ ensures
+ - if (size() < max_dictionary_size() then
+ - if (projection_error(x) > minimum_tolerance()) then
+ - adds x into the dictionary
+ - (*this)[#size()-1] == x
+ - #size() == size() + 1
+ - returns true
+ - else
+ - the dictionary is not changed
+ - returns false
+ - else
+ - #size() == size()
+ (i.e. the number of vectors in this object doesn't change)
+ - since the dictionary is full adding a new element means we have to
+ remove one of the current ones. So let proj_error[i] be equal to the
+ projection error obtained when projecting dictionary vector (*this)[i]
+ onto the other elements of the dictionary. Then let min_proj_error
+ be equal to the minimum value in proj_error. The dictionary element
+ with the minimum projection error is the "least linearly independent"
+ vector in the dictionary and is the one which will be removed to make
+ room for a new element.
+ - if (projection_error(x) > minimum_tolerance() && projection_error(x) > min_proj_error)
+ - the least linearly independent vector in this object is removed
+ - adds x into the dictionary
+ - (*this)[#size()-1] == x
+ - returns true
+ - else
+ - the dictionary is not changed
+ - returns false
+ !*/
+
+ scalar_type projection_error (
+ const sample_type& x
+ ) const;
+ /*!
+ ensures
+ - returns the squared distance between x and the subspace spanned by
+ the set of dictionary vectors. (e.g. this is the same number that
+ gets returned by the empirical_kernel_map::project() function's
+ projection_error argument when the ekm is loaded with the dictionary
+ vectors.)
+ - Note that if the dictionary is empty then the return value is
+ equal to get_kernel()(x,x).
+ !*/
+
+ void swap (
+ linearly_independent_subset_finder& item
+ );
+ /*!
+ ensures
+ - swaps *this with item
+ !*/
+
+ size_t size (
+ ) const;
+ /*!
+ ensures
+ - returns the number of vectors in the dictionary.
+ !*/
+
+ const sample_type& operator[] (
+ unsigned long index
+ ) const;
+ /*!
+ requires
+ - index < size()
+ ensures
+ - returns the index'th element in the set of linearly independent
+ vectors contained in this object.
+ !*/
+
+ const matrix<sample_type,0,1,mem_manager_type> get_dictionary (
+ ) const;
+ /*!
+ ensures
+ - returns a column vector that contains all the dictionary
+ vectors in this object.
+ !*/
+
+ const matrix<scalar_type,0,0,mem_manager_type>& get_kernel_matrix (
+ ) const;
+ /*!
+ ensures
+ - returns a matrix K such that:
+ - K.nr() == K.nc() == size()
+ - K == kernel_matrix(get_kernel(), get_dictionary())
+ i.e. K == the kernel matrix for the dictionary vectors
+ !*/
+
+ const matrix<scalar_type,0,0,mem_manager_type>& get_inv_kernel_marix (
+ ) const;
+ /*!
+ ensures
+ - if (size() != 0)
+ - returns inv(get_kernel_matrix())
+ - else
+ - returns an empty matrix
+ !*/
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename kernel_type
+ >
+ void swap(
+ linearly_independent_subset_finder<kernel_type>& a,
+ linearly_independent_subset_finder<kernel_type>& b
+ ) { a.swap(b); }
+ /*!
+ provides a global swap function
+ !*/
+
+ template <
+ typename kernel_type
+ >
+ void serialize (
+ const linearly_independent_subset_finder<kernel_type>& item,
+ std::ostream& out
+ );
+ /*!
+ provides serialization support for linearly_independent_subset_finder objects
+ !*/
+
+ template <
+ typename kernel_type
+ >
+ void deserialize (
+ linearly_independent_subset_finder<kernel_type>& item,
+ std::istream& in
+ );
+ /*!
+ provides serialization support for linearly_independent_subset_finder objects
+ !*/
+
+ template <
+ typename T
+ >
+ const matrix_exp mat (
+ const linearly_independent_subset_finder<T>& m
+ );
+ /*!
+ ensures
+ - converts m into a matrix
+ - returns a matrix R such that:
+ - is_col_vector(R) == true
+ - R.size() == m.size()
+ - for all valid r:
+ R(r) == m[r]
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename kernel_type,
+ typename vector_type,
+ typename rand_type
+ >
+ void fill_lisf (
+ linearly_independent_subset_finder<kernel_type>& lisf,
+ const vector_type& samples,
+ rand_type& rnd,
+ int sampling_size = 2000
+ );
+ /*!
+ requires
+ - vector_type == a dlib::matrix or something convertible to one via
+ mat()
+ - is_vector(mat(samples)) == true
+ - rand_type == an implementation of rand/rand_kernel_abstract.h or a type
+ convertible to a string via cast_to_string()
+ - sampling_size > 0
+ ensures
+ - The purpose of this function is to fill lisf with points from samples. It does
+ this by randomly sampling elements of samples until no more can be added. The
+ precise stopping condition is when sampling_size additions to lisf have failed
+ or the max dictionary size has been reached.
+ - This function employs a random number generator. If rand_type is a random
+ number generator then it uses the instance given. Otherwise it uses cast_to_string(rnd)
+ to seed a new random number generator.
+ !*/
+
+ template <
+ typename kernel_type,
+ typename vector_type
+ >
+ void fill_lisf (
+ linearly_independent_subset_finder<kernel_type>& lisf,
+ const vector_type& samples
+ );
+ /*!
+ requires
+ - vector_type == a dlib::matrix or something convertible to one via
+ mat()
+ - is_vector(mat(samples)) == true
+ ensures
+ - performs fill_lisf(lisf, samples, default_rand_generator, 2000)
+ !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LISf_ABSTRACT_
+