summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/svm/kcentroid.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-09 13:19:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-03-09 13:20:02 +0000
commit58daab21cd043e1dc37024a7f99b396788372918 (patch)
tree96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /ml/dlib/dlib/svm/kcentroid.h
parentReleasing debian version 1.43.2-1. (diff)
downloadnetdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz
netdata-58daab21cd043e1dc37024a7f99b396788372918.zip
Merging upstream version 1.44.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/dlib/svm/kcentroid.h')
-rw-r--r--ml/dlib/dlib/svm/kcentroid.h614
1 files changed, 614 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/kcentroid.h b/ml/dlib/dlib/svm/kcentroid.h
new file mode 100644
index 000000000..5f380486a
--- /dev/null
+++ b/ml/dlib/dlib/svm/kcentroid.h
@@ -0,0 +1,614 @@
+// Copyright (C) 2008 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_KCENTROId_
+#define DLIB_KCENTROId_
+
+#include <vector>
+
+#include "kcentroid_abstract.h"
+#include "../matrix.h"
+#include "function.h"
+#include "../std_allocator.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename kernel_type>
+ class kcentroid
+ {
+ /*!
+ This object represents a weighted sum of sample points in a kernel induced
+ feature space. It can be used to kernelize any algorithm that requires only
+ the ability to perform vector addition, subtraction, scalar multiplication,
+ and inner products. It uses the sparsification technique described in the
+ paper The Kernel Recursive Least Squares Algorithm by Yaakov Engel.
+
+ To understand the code it would also be useful to consult page 114 of the book
+ Kernel Methods for Pattern Analysis by Taylor and Cristianini as well as page 554
+ (particularly equation 18.31) of the book Learning with Kernels by Scholkopf and
+ Smola. Everything you really need to know is in the Engel paper. But the other
+ books help give more perspective on the issues involved.
+
+
+ INITIAL VALUE
+ - min_strength == 0
+ - min_vect_idx == 0
+ - K_inv.size() == 0
+ - K.size() == 0
+ - dictionary.size() == 0
+ - bias == 0
+ - bias_is_stale == false
+
+ CONVENTION
+ - max_dictionary_size() == my_max_dictionary_size
+ - get_kernel() == kernel
+
+ - K.nr() == dictionary.size()
+ - K.nc() == dictionary.size()
+ - for all valid r,c:
+ - K(r,c) == kernel(dictionary[r], dictionary[c])
+ - K_inv == inv(K)
+
+ - if (dictionary.size() == my_max_dictionary_size && my_remove_oldest_first == false) then
+ - for all valid 0 < i < dictionary.size():
+ - Let STRENGTHS[i] == the delta you would get for dictionary[i] (i.e. Approximately
+ Linearly Dependent value) if you removed dictionary[i] from this object and then
+ tried to add it back in.
+ - min_strength == the minimum value from STRENGTHS
+ - min_vect_idx == the index of the element in STRENGTHS with the smallest value
+
+ !*/
+
+ public:
+ typedef typename kernel_type::scalar_type scalar_type;
+ typedef typename kernel_type::sample_type sample_type;
+ typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+ kcentroid (
+ ) :
+ my_remove_oldest_first(false),
+ my_tolerance(0.001),
+ my_max_dictionary_size(1000000),
+ bias(0),
+ bias_is_stale(false)
+ {
+ clear_dictionary();
+ }
+
+ explicit kcentroid (
+ const kernel_type& kernel_,
+ scalar_type tolerance_ = 0.001,
+ unsigned long max_dictionary_size_ = 1000000,
+ bool remove_oldest_first_ = false
+ ) :
+ my_remove_oldest_first(remove_oldest_first_),
+ kernel(kernel_),
+ my_tolerance(tolerance_),
+ my_max_dictionary_size(max_dictionary_size_),
+ bias(0),
+ bias_is_stale(false)
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(tolerance_ > 0 && max_dictionary_size_ > 1,
+ "\tkcentroid::kcentroid()"
+ << "\n\t You have to give a positive tolerance"
+ << "\n\t this: " << this
+ << "\n\t tolerance_: " << tolerance_
+ << "\n\t max_dictionary_size_: " << max_dictionary_size_
+ );
+
+ clear_dictionary();
+ }
+
+ scalar_type tolerance() const
+ {
+ return my_tolerance;
+ }
+
+ unsigned long max_dictionary_size() const
+ {
+ return my_max_dictionary_size;
+ }
+
+ bool remove_oldest_first (
+ ) const
+ {
+ return my_remove_oldest_first;
+ }
+
+ const kernel_type& get_kernel (
+ ) const
+ {
+ return kernel;
+ }
+
+ void clear_dictionary ()
+ {
+ dictionary.clear();
+ alpha.clear();
+
+ min_strength = 0;
+ min_vect_idx = 0;
+ K_inv.set_size(0,0);
+ K.set_size(0,0);
+ samples_seen = 0;
+ bias = 0;
+ bias_is_stale = false;
+ }
+
+ scalar_type operator() (
+ const kcentroid& x
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(x.get_kernel() == get_kernel(),
+ "\tscalar_type kcentroid::operator()(const kcentroid& x)"
+ << "\n\tYou can only compare two kcentroid objects if they use the same kernel"
+ << "\n\tthis: " << this
+ );
+
+ // make sure the bias terms are up to date
+ refresh_bias();
+ x.refresh_bias();
+
+ scalar_type temp = x.bias + bias - 2*inner_product(x);
+
+ if (temp > 0)
+ return std::sqrt(temp);
+ else
+ return 0;
+ }
+
+ scalar_type inner_product (
+ const sample_type& x
+ ) const
+ {
+ scalar_type temp = 0;
+ for (unsigned long i = 0; i < alpha.size(); ++i)
+ temp += alpha[i]*kernel(dictionary[i], x);
+ return temp;
+ }
+
+ scalar_type inner_product (
+ const kcentroid& x
+ ) const
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(x.get_kernel() == get_kernel(),
+ "\tscalar_type kcentroid::inner_product(const kcentroid& x)"
+ << "\n\tYou can only compare two kcentroid objects if they use the same kernel"
+ << "\n\tthis: " << this
+ );
+
+ scalar_type temp = 0;
+ for (unsigned long i = 0; i < alpha.size(); ++i)
+ {
+ for (unsigned long j = 0; j < x.alpha.size(); ++j)
+ {
+ temp += alpha[i]*x.alpha[j]*kernel(dictionary[i], x.dictionary[j]);
+ }
+ }
+ return temp;
+ }
+
+ scalar_type squared_norm (
+ ) const
+ {
+ refresh_bias();
+ return bias;
+ }
+
+ scalar_type operator() (
+ const sample_type& x
+ ) const
+ {
+ // make sure the bias terms are up to date
+ refresh_bias();
+
+ const scalar_type kxx = kernel(x,x);
+
+ scalar_type temp = kxx + bias - 2*inner_product(x);
+ if (temp > 0)
+ return std::sqrt(temp);
+ else
+ return 0;
+ }
+
+ scalar_type samples_trained (
+ ) const
+ {
+ return samples_seen;
+ }
+
+ scalar_type test_and_train (
+ const sample_type& x
+ )
+ {
+ ++samples_seen;
+ const scalar_type xscale = 1/samples_seen;
+ const scalar_type cscale = 1-xscale;
+ return train_and_maybe_test(x,cscale,xscale,true);
+ }
+
+ void train (
+ const sample_type& x
+ )
+ {
+ ++samples_seen;
+ const scalar_type xscale = 1/samples_seen;
+ const scalar_type cscale = 1-xscale;
+ train_and_maybe_test(x,cscale,xscale,false);
+ }
+
+ scalar_type test_and_train (
+ const sample_type& x,
+ scalar_type cscale,
+ scalar_type xscale
+ )
+ {
+ ++samples_seen;
+ return train_and_maybe_test(x,cscale,xscale,true);
+ }
+
+ void scale_by (
+ scalar_type cscale
+ )
+ {
+ for (unsigned long i = 0; i < alpha.size(); ++i)
+ {
+ alpha[i] = cscale*alpha[i];
+ }
+ }
+
+ void train (
+ const sample_type& x,
+ scalar_type cscale,
+ scalar_type xscale
+ )
+ {
+ ++samples_seen;
+ train_and_maybe_test(x,cscale,xscale,false);
+ }
+
+ void swap (
+ kcentroid& item
+ )
+ {
+ exchange(min_strength, item.min_strength);
+ exchange(min_vect_idx, item.min_vect_idx);
+ exchange(my_remove_oldest_first, item.my_remove_oldest_first);
+
+ exchange(kernel, item.kernel);
+ dictionary.swap(item.dictionary);
+ alpha.swap(item.alpha);
+ K_inv.swap(item.K_inv);
+ K.swap(item.K);
+ exchange(my_tolerance, item.my_tolerance);
+ exchange(samples_seen, item.samples_seen);
+ exchange(bias, item.bias);
+ a.swap(item.a);
+ k.swap(item.k);
+ exchange(bias_is_stale, item.bias_is_stale);
+ exchange(my_max_dictionary_size, item.my_max_dictionary_size);
+ }
+
+ unsigned long dictionary_size (
+ ) const { return dictionary.size(); }
+
+ friend void serialize(const kcentroid& item, std::ostream& out)
+ {
+ serialize(item.min_strength, out);
+ serialize(item.min_vect_idx, out);
+ serialize(item.my_remove_oldest_first, out);
+
+ serialize(item.kernel, out);
+ serialize(item.dictionary, out);
+ serialize(item.alpha, out);
+ serialize(item.K_inv, out);
+ serialize(item.K, out);
+ serialize(item.my_tolerance, out);
+ serialize(item.samples_seen, out);
+ serialize(item.bias, out);
+ serialize(item.bias_is_stale, out);
+ serialize(item.my_max_dictionary_size, out);
+ }
+
+ friend void deserialize(kcentroid& item, std::istream& in)
+ {
+ deserialize(item.min_strength, in);
+ deserialize(item.min_vect_idx, in);
+ deserialize(item.my_remove_oldest_first, in);
+
+ deserialize(item.kernel, in);
+ deserialize(item.dictionary, in);
+ deserialize(item.alpha, in);
+ deserialize(item.K_inv, in);
+ deserialize(item.K, in);
+ deserialize(item.my_tolerance, in);
+ deserialize(item.samples_seen, in);
+ deserialize(item.bias, in);
+ deserialize(item.bias_is_stale, in);
+ deserialize(item.my_max_dictionary_size, in);
+ }
+
+ distance_function<kernel_type> get_distance_function (
+ ) const
+ {
+ refresh_bias();
+ return distance_function<kernel_type>(mat(alpha),
+ bias,
+ kernel,
+ mat(dictionary));
+ }
+
+ private:
+
+ void refresh_bias (
+ ) const
+ {
+ if (bias_is_stale)
+ {
+ bias_is_stale = false;
+ // recompute the bias term
+ bias = sum(pointwise_multiply(K, mat(alpha)*trans(mat(alpha))));
+ }
+ }
+
+ scalar_type train_and_maybe_test (
+ const sample_type& x,
+ scalar_type cscale,
+ scalar_type xscale,
+ bool do_test
+ )
+ {
+ scalar_type test_result = 0;
+ const scalar_type kx = kernel(x,x);
+ if (alpha.size() == 0)
+ {
+ // just ignore this sample if it is the zero vector (or really close to being zero)
+ if (std::abs(kx) > std::numeric_limits<scalar_type>::epsilon())
+ {
+ // set initial state since this is the first training example we have seen
+
+ K_inv.set_size(1,1);
+ K_inv(0,0) = 1/kx;
+ K.set_size(1,1);
+ K(0,0) = kx;
+
+ alpha.push_back(xscale);
+ dictionary.push_back(x);
+ }
+ else
+ {
+ // the distance from an empty kcentroid and the zero vector is zero by definition.
+ return 0;
+ }
+ }
+ else
+ {
+ // fill in k
+ k.set_size(alpha.size());
+ for (long r = 0; r < k.nr(); ++r)
+ k(r) = kernel(x,dictionary[r]);
+
+ if (do_test)
+ {
+ refresh_bias();
+ test_result = std::sqrt(kx + bias - 2*trans(mat(alpha))*k);
+ }
+
+ // compute the error we would have if we approximated the new x sample
+ // with the dictionary. That is, do the ALD test from the KRLS paper.
+ a = K_inv*k;
+ scalar_type delta = kx - trans(k)*a;
+
+ // if this new vector isn't approximately linearly dependent on the vectors
+ // in our dictionary.
+ if (delta > min_strength && delta > my_tolerance)
+ {
+ bool need_to_update_min_strength = false;
+ if (dictionary.size() >= my_max_dictionary_size)
+ {
+ // We need to remove one of the old members of the dictionary before
+ // we proceed with adding a new one.
+ long idx_to_remove;
+ if (my_remove_oldest_first)
+ {
+ // remove the oldest one
+ idx_to_remove = 0;
+ }
+ else
+ {
+ // if we have never computed the min_strength then we should compute it
+ if (min_strength == 0)
+ recompute_min_strength();
+
+ // select the dictionary vector that is most linearly dependent for removal
+ idx_to_remove = min_vect_idx;
+ need_to_update_min_strength = true;
+ }
+
+ remove_dictionary_vector(idx_to_remove);
+
+ // recompute these guys since they were computed with the old
+ // kernel matrix
+ k = remove_row(k,idx_to_remove);
+ a = K_inv*k;
+ delta = kx - trans(k)*a;
+ }
+
+ // add x to the dictionary
+ dictionary.push_back(x);
+
+
+ // update K_inv by computing the new one in the temp matrix (equation 3.14)
+ matrix<scalar_type,0,0,mem_manager_type> temp(K_inv.nr()+1, K_inv.nc()+1);
+ // update the middle part of the matrix
+ set_subm(temp, get_rect(K_inv)) = K_inv + a*trans(a)/delta;
+ // update the right column of the matrix
+ set_subm(temp, 0, K_inv.nr(),K_inv.nr(),1) = -a/delta;
+ // update the bottom row of the matrix
+ set_subm(temp, K_inv.nr(), 0, 1, K_inv.nr()) = trans(-a/delta);
+ // update the bottom right corner of the matrix
+ temp(K_inv.nr(), K_inv.nc()) = 1/delta;
+ // put temp into K_inv
+ temp.swap(K_inv);
+
+
+
+ // update K (the kernel matrix)
+ temp.set_size(K.nr()+1, K.nc()+1);
+ set_subm(temp, get_rect(K)) = K;
+ // update the right column of the matrix
+ set_subm(temp, 0, K.nr(),K.nr(),1) = k;
+ // update the bottom row of the matrix
+ set_subm(temp, K.nr(), 0, 1, K.nr()) = trans(k);
+ temp(K.nr(), K.nc()) = kx;
+ // put temp into K
+ temp.swap(K);
+
+
+ // now update the alpha vector
+ for (unsigned long i = 0; i < alpha.size(); ++i)
+ {
+ alpha[i] *= cscale;
+ }
+ alpha.push_back(xscale);
+
+
+ if (need_to_update_min_strength)
+ {
+ // now we have to recompute the min_strength in this case
+ recompute_min_strength();
+ }
+ }
+ else
+ {
+ // update the alpha vector so that this new sample has been added into
+ // the mean vector we are accumulating
+ for (unsigned long i = 0; i < alpha.size(); ++i)
+ {
+ alpha[i] = cscale*alpha[i] + xscale*a(i);
+ }
+ }
+ }
+
+ bias_is_stale = true;
+
+ return test_result;
+ }
+
+ void remove_dictionary_vector (
+ long i
+ )
+ /*!
+ requires
+ - 0 <= i < dictionary.size()
+ ensures
+ - #dictionary.size() == dictionary.size() - 1
+ - #alpha.size() == alpha.size() - 1
+ - updates the K_inv matrix so that it is still a proper inverse of the
+ kernel matrix
+ - also removes the necessary row and column from the K matrix
+ - uses the this->a variable so after this function runs that variable
+ will contain a different value.
+ !*/
+ {
+ // remove the dictionary vector
+ dictionary.erase(dictionary.begin()+i);
+
+ // remove the i'th vector from the inverse kernel matrix. This formula is basically
+ // just the reverse of the way K_inv is updated by equation 3.14 during normal training.
+ K_inv = removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i);
+
+ // now compute the updated alpha values to take account that we just removed one of
+ // our dictionary vectors
+ a = (K_inv*remove_row(K,i)*mat(alpha));
+
+ // now copy over the new alpha values
+ alpha.resize(alpha.size()-1);
+ for (unsigned long k = 0; k < alpha.size(); ++k)
+ {
+ alpha[k] = a(k);
+ }
+
+ // update the K matrix as well
+ K = removerc(K,i,i);
+ }
+
+ void recompute_min_strength (
+ )
+ /*!
+ ensures
+ - recomputes the min_strength and min_vect_idx values
+ so that they are correct with respect to the CONVENTION
+ - uses the this->a variable so after this function runs that variable
+ will contain a different value.
+ !*/
+ {
+ min_strength = std::numeric_limits<scalar_type>::max();
+
+ // here we loop over each dictionary vector and compute what its delta would be if
+ // we were to remove it from the dictionary and then try to add it back in.
+ for (unsigned long i = 0; i < dictionary.size(); ++i)
+ {
+ // compute a = K_inv*k but where dictionary vector i has been removed
+ a = (removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i)) *
+ (remove_row(colm(K,i),i));
+ scalar_type delta = K(i,i) - trans(remove_row(colm(K,i),i))*a;
+
+ if (delta < min_strength)
+ {
+ min_strength = delta;
+ min_vect_idx = i;
+ }
+ }
+ }
+
+
+
+ typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type;
+ typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type;
+ typedef std::vector<sample_type,alloc_sample_type> dictionary_vector_type;
+ typedef std::vector<scalar_type,alloc_scalar_type> alpha_vector_type;
+
+
+ scalar_type min_strength;
+ unsigned long min_vect_idx;
+ bool my_remove_oldest_first;
+
+ kernel_type kernel;
+ dictionary_vector_type dictionary;
+ alpha_vector_type alpha;
+
+ matrix<scalar_type,0,0,mem_manager_type> K_inv;
+ matrix<scalar_type,0,0,mem_manager_type> K;
+
+ scalar_type my_tolerance;
+ unsigned long my_max_dictionary_size;
+ scalar_type samples_seen;
+ mutable scalar_type bias;
+ mutable bool bias_is_stale;
+
+
+ // temp variables here just so we don't have to reconstruct them over and over. Thus,
+ // they aren't really part of the state of this object.
+ matrix<scalar_type,0,1,mem_manager_type> a;
+ matrix<scalar_type,0,1,mem_manager_type> k;
+
+ };
+
+// ----------------------------------------------------------------------------------------
+
+ template <typename kernel_type>
+ void swap(kcentroid<kernel_type>& a, kcentroid<kernel_type>& b)
+ { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_KCENTROId_
+