diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:20:02 +0000 |
commit | 58daab21cd043e1dc37024a7f99b396788372918 (patch) | |
tree | 96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /ml/dlib/dlib/svm/rvm.h | |
parent | Releasing debian version 1.43.2-1. (diff) | |
download | netdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz netdata-58daab21cd043e1dc37024a7f99b396788372918.zip |
Merging upstream version 1.44.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/dlib/dlib/svm/rvm.h')
-rw-r--r-- | ml/dlib/dlib/svm/rvm.h | 1018 |
1 files changed, 1018 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/rvm.h b/ml/dlib/dlib/svm/rvm.h new file mode 100644 index 000000000..e7ad495a2 --- /dev/null +++ b/ml/dlib/dlib/svm/rvm.h @@ -0,0 +1,1018 @@ +// Copyright (C) 2008 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_RVm_ +#define DLIB_RVm_ + +#include "rvm_abstract.h" +#include <cmath> +#include <limits> +#include "../matrix.h" +#include "../algs.h" +#include "function.h" +#include "kernel.h" + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + namespace rvm_helpers + { + + // ------------------------------------------------------------------------------------ + + template <typename scalar_vector_type, typename mem_manager_type> + long find_next_best_alpha_to_update ( + const scalar_vector_type& S, + const scalar_vector_type& Q, + const scalar_vector_type& alpha, + const matrix<long,0,1,mem_manager_type>& active_bases, + const bool search_all_alphas, + typename scalar_vector_type::type eps + ) + /*! + ensures + - if (we can find another alpha to update) then + - returns the index of said alpha + - else + - returns -1 + !*/ + { + typedef typename scalar_vector_type::type scalar_type; + // now use S and Q to find next alpha to update. What + // we want to do here is select the alpha to update that gives us + // the greatest improvement in marginal likelihood. + long selected_idx = -1; + scalar_type greatest_improvement = -1; + for (long i = 0; i < S.nr(); ++i) + { + scalar_type value = -1; + + // if i is currently in the active set + if (active_bases(i) >= 0) + { + const long idx = active_bases(i); + const scalar_type s = alpha(idx)*S(i)/(alpha(idx) - S(i)); + const scalar_type q = alpha(idx)*Q(i)/(alpha(idx) - S(i)); + + if (q*q-s > 0) + { + // only update an existing alpha if this is a narrow search + if (search_all_alphas == false) + { + // choosing this sample would mean doing an update of an + // existing alpha value. + scalar_type new_alpha = s*s/(q*q-s); + scalar_type cur_alpha = alpha(idx); + new_alpha = 1/new_alpha; + cur_alpha = 1/cur_alpha; + + // from equation 32 in the Tipping paper + value = Q(i)*Q(i)/(S(i) + 1/(new_alpha - cur_alpha) ) - + std::log(1 + S(i)*(new_alpha - cur_alpha)); + } + + } + // we only pick an alpha to remove if this is a wide search and it wasn't one of the recently added ones + else if (search_all_alphas && idx+2 < alpha.size() ) + { + // choosing this sample would mean the alpha value is infinite + // so we would remove the selected sample from our model. + + // from equation 37 in the Tipping paper + value = Q(i)*Q(i)/(S(i) - alpha(idx)) - + std::log(1-S(i)/alpha(idx)); + + } + } + else if (search_all_alphas) + { + const scalar_type s = S(i); + const scalar_type q = Q(i); + + if (q*q-s > 0) + { + // choosing this sample would mean we would add the selected + // sample to our model. + + // from equation 27 in the Tipping paper + value = (Q(i)*Q(i)-S(i))/S(i) + std::log(S(i)/(Q(i)*Q(i))); + } + } + + if (value > greatest_improvement) + { + greatest_improvement = value; + selected_idx = i; + } + } + + // If the greatest_improvement in marginal likelihood we would get is less + // than our epsilon then report that there isn't anything else to do. But + // if it is big enough then return the selected_idx. + if (greatest_improvement > eps) + return selected_idx; + else + return -1; + } + + } // end namespace rvm_helpers + + // ------------------------------------------------------------------------------------ + + + template < + typename kern_type + > + class rvm_trainer + { + /*! + This is an implementation of the binary classifier version of the + relevance vector machine algorithm described in the paper: + Tipping, M. E. and A. C. Faul (2003). Fast marginal likelihood maximisation + for sparse Bayesian models. In C. M. Bishop and B. J. Frey (Eds.), Proceedings + of the Ninth International Workshop on Artificial Intelligence and Statistics, + Key West, FL, Jan 3-6. + + This code mostly does what is described in the above paper with the exception + that here we use a different stopping condition as well as a modified alpha + selection rule. See the code for the exact details. + !*/ + + public: + typedef kern_type kernel_type; + typedef typename kernel_type::scalar_type scalar_type; + typedef typename kernel_type::sample_type sample_type; + typedef typename kernel_type::mem_manager_type mem_manager_type; + typedef decision_function<kernel_type> trained_function_type; + + rvm_trainer ( + ) : eps(0.001), max_iterations(2000) + { + } + + void set_max_iterations ( + unsigned long max_iterations_ + ) + { + max_iterations = max_iterations_; + } + + unsigned long get_max_iterations ( + ) const + { + return max_iterations; + } + + void set_epsilon ( + scalar_type eps_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(eps_ > 0, + "\tvoid rvm_trainer::set_epsilon(eps_)" + << "\n\t invalid inputs were given to this function" + << "\n\t eps: " << eps_ + ); + eps = eps_; + } + + const scalar_type get_epsilon ( + ) const + { + return eps; + } + + void set_kernel ( + const kernel_type& k + ) + { + kernel = k; + } + + const kernel_type& get_kernel ( + ) const + { + return kernel; + } + + template < + typename in_sample_vector_type, + typename in_scalar_vector_type + > + const decision_function<kernel_type> train ( + const in_sample_vector_type& x, + const in_scalar_vector_type& y + ) const + { + return do_train(mat(x), mat(y)); + } + + void swap ( + rvm_trainer& item + ) + { + exchange(kernel, item.kernel); + exchange(eps, item.eps); + } + + private: + + // ------------------------------------------------------------------------------------ + + typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; + typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type; + + template < + typename in_sample_vector_type, + typename in_scalar_vector_type + > + const decision_function<kernel_type> do_train ( + const in_sample_vector_type& x, + const in_scalar_vector_type& y + ) const + { + + // make sure requires clause is not broken + DLIB_ASSERT(is_binary_classification_problem(x,y) == true, + "\tdecision_function rvm_trainer::train(x,y)" + << "\n\t invalid inputs were given to this function" + << "\n\t x.nr(): " << x.nr() + << "\n\t y.nr(): " << y.nr() + << "\n\t x.nc(): " << x.nc() + << "\n\t y.nc(): " << y.nc() + << "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false") + ); + + // make a target vector where +1 examples have value 1 and -1 examples + // have a value of 0. + scalar_vector_type t(y.size()); + for (long i = 0; i < y.size(); ++i) + { + if (y(i) == 1) + t(i) = 1; + else + t(i) = 0; + } + + /*! This is the convention for the active_bases variable in the function: + - if (active_bases(i) >= 0) then + - alpha(active_bases(i)) == the alpha value associated with sample x(i) + - weights(active_bases(i)) == the weight value associated with sample x(i) + - colm(phi, active_bases(i)) == the column of phi associated with sample x(i) + - colm(phi, active_bases(i)) == kernel column i (from get_kernel_colum()) + - else + - the i'th sample isn't in the model and notionally has an alpha of infinity and + a weight of 0. + !*/ + matrix<long,0,1,mem_manager_type> active_bases(x.nr()); + scalar_matrix_type phi(x.nr(),1); + scalar_vector_type alpha(1), prev_alpha; + scalar_vector_type weights(1), prev_weights; + + scalar_vector_type tempv, K_col; + + // set the initial values of these guys + set_all_elements(active_bases, -1); + long first_basis = pick_initial_vector(x,t); + get_kernel_colum(first_basis, x, K_col); + active_bases(first_basis) = 0; + set_colm(phi,0) = K_col; + alpha(0) = compute_initial_alpha(phi, t); + weights(0) = 1; + + + // now declare a bunch of other variables we will be using below + scalar_vector_type mu, t_hat, Q, S; + scalar_matrix_type sigma; + + matrix<scalar_type,1,0,mem_manager_type> tempv2, tempv3; + scalar_matrix_type tempm; + + scalar_vector_type t_estimate; + scalar_vector_type beta; + + + Q.set_size(x.nr()); + S.set_size(x.nr()); + + bool recompute_beta = true; + + bool search_all_alphas = false; + unsigned long ticker = 0; + const unsigned long rounds_of_narrow_search = 100; + unsigned long iterations = 0; + + while (iterations != max_iterations) + { + iterations++; + if (recompute_beta) + { + // calculate the current t_estimate. (this is the predicted t value for each sample according to the + // current state of the classifier) + t_estimate = phi*weights; + + // calculate the current beta + beta = sigmoid(t_estimate); + beta = pointwise_multiply(beta,(uniform_matrix<scalar_type>(beta.nr(),beta.nc(),1)-beta)); + recompute_beta = false; + } + + // Compute optimal weights and sigma for current alpha using IRLS. This is the same + // technique documented in the paper by equations 12-14. + scalar_type weight_delta = std::numeric_limits<scalar_type>::max(); + int count = 0; + while (weight_delta > 0.0001) + { + // This is a sanity check to make sure we never get stuck in this + // loop to do some degenerate numerical condition + ++count; + if (count > 100) + { + // jump us to where search_all_alphas will be set to true + ticker = rounds_of_narrow_search; + break; + } + + // compute the updated sigma matrix + sigma = scale_columns(trans(phi),beta)*phi; + for (long r = 0; r < alpha.nr(); ++r) + sigma(r,r) += alpha(r); + sigma = inv(sigma); + + + // compute the updated weights vector (t_hat = phi*mu_mp + inv(B)*(t-y)) + t_hat = t_estimate + trans(scale_columns(trans(t-sigmoid(t_estimate)),reciprocal(beta))); + + // mu = sigma*trans(phi)*b*t_hat + mu = sigma*tmp(trans(phi)* trans(scale_columns(trans(t_hat), beta))); + + // now compute how much the weights vector changed during this iteration + // through this loop. + weight_delta = max(abs(mu-weights)); + + // put mu into the weights vector + mu.swap(weights); + + // calculate the current t_estimate + t_estimate = phi*weights; + + // calculate the current beta + beta = sigmoid(t_estimate); + beta = pointwise_multiply(beta, uniform_matrix<scalar_type>(beta.nr(),beta.nc(),1)-beta); + + } + + // check if we should do a full search for the best alpha to optimize + if (ticker >= rounds_of_narrow_search) + { + // if the current alpha and weights are equal to what they were + // at the last time we were about to start a wide search then + // we are done. + if (equal(prev_alpha, alpha, eps) && equal(prev_weights, weights, eps)) + break; + + + prev_alpha = alpha; + prev_weights = weights; + search_all_alphas = true; + ticker = 0; + } + else + { + search_all_alphas = false; + } + ++ticker; + + // compute S and Q using equations 24 and 25 (tempv = phi*sigma*trans(phi)*B*t_hat) + tempv = phi*tmp(sigma*tmp(trans(phi)*trans(scale_columns(trans(t_hat),beta)))); + for (long i = 0; i < S.size(); ++i) + { + // if we are currently limiting the search for the next alpha to update + // to the set in the active set then skip a non-active vector. + if (search_all_alphas == false && active_bases(i) == -1) + continue; + + // get the column for this sample out of the kernel matrix. If it is + // something in the active set then just get it right out of phi, otherwise + // we have to calculate it. + if (active_bases(i) != -1) + K_col = colm(phi,active_bases(i)); + else + get_kernel_colum(i, x, K_col); + + // tempv2 = trans(phi_m)*B + tempv2 = scale_columns(trans(K_col), beta); + tempv3 = tempv2*phi; + S(i) = tempv2*K_col - tempv3*sigma*trans(tempv3); + Q(i) = tempv2*t_hat - tempv2*tempv; + } + + const long selected_idx = rvm_helpers::find_next_best_alpha_to_update(S,Q,alpha,active_bases, search_all_alphas, eps); + + + // if find_next_best_alpha_to_update didn't find any good alpha to update + if (selected_idx == -1) + { + if (search_all_alphas == false) + { + // jump us to where search_all_alphas will be set to true and try again + ticker = rounds_of_narrow_search; + continue; + } + else + { + // we are really done so quit the main loop + break; + } + } + + + // next we update the selected alpha. + + // if the selected alpha is in the active set + if (active_bases(selected_idx) >= 0) + { + const long idx = active_bases(selected_idx); + const scalar_type s = alpha(idx)*S(selected_idx)/(alpha(idx) - S(selected_idx)); + const scalar_type q = alpha(idx)*Q(selected_idx)/(alpha(idx) - S(selected_idx)); + + if (q*q-s > 0) + { + // reestimate the value of alpha + alpha(idx) = s*s/(q*q-s); + + } + else + { + // the new alpha value is infinite so remove the selected alpha from our model + active_bases(selected_idx) = -1; + phi = remove_col(phi, idx); + weights = remove_row(weights, idx); + alpha = remove_row(alpha, idx); + + // fix the index values in active_bases + for (long i = 0; i < active_bases.size(); ++i) + { + if (active_bases(i) > idx) + { + active_bases(i) -= 1; + } + } + + // we changed the number of weights so we need to remember to + // recompute the beta vector next time around the main loop. + recompute_beta = true; + } + } + else + { + const scalar_type s = S(selected_idx); + const scalar_type q = Q(selected_idx); + + if (q*q-s > 0) + { + // add the selected alpha to our model + + active_bases(selected_idx) = phi.nc(); + + // update alpha + tempv.set_size(alpha.size()+1); + set_subm(tempv, get_rect(alpha)) = alpha; + tempv(phi.nc()) = s*s/(q*q-s); + tempv.swap(alpha); + + // update weights + tempv.set_size(weights.size()+1); + set_subm(tempv, get_rect(weights)) = weights; + tempv(phi.nc()) = 0; + tempv.swap(weights); + + // update phi by adding the new sample's kernel matrix column in as one of phi's columns + tempm.set_size(phi.nr(), phi.nc()+1); + set_subm(tempm, get_rect(phi)) = phi; + get_kernel_colum(selected_idx, x, K_col); + set_colm(tempm, phi.nc()) = K_col; + tempm.swap(phi); + + + // we changed the number of weights so we need to remember to + // recompute the beta vector next time around the main loop. + recompute_beta = true; + } + } + + } // end while(true). So we have converged on the final answer. + + + // now put everything into a decision_function object and return it + std_vector_c<sample_type> dictionary; + std_vector_c<scalar_type> final_weights; + for (long i = 0; i < active_bases.size(); ++i) + { + if (active_bases(i) >= 0) + { + dictionary.push_back(x(i)); + final_weights.push_back(weights(active_bases(i))); + } + } + + return decision_function<kernel_type> ( mat(final_weights), + -sum(mat(final_weights))*tau, + kernel, + mat(dictionary)); + + } + + // ------------------------------------------------------------------------------------ + + template <typename M1, typename M2> + long pick_initial_vector ( + const M1& x, + const M2& t + ) const + { + scalar_vector_type K_col; + double max_projection = -std::numeric_limits<scalar_type>::infinity(); + long max_idx = 0; + // find the row in the kernel matrix that has the biggest normalized projection onto the t vector + for (long r = 0; r < x.nr(); ++r) + { + get_kernel_colum(r,x,K_col); + double temp = trans(K_col)*t; + temp = temp*temp/length_squared(K_col); + + if (temp > max_projection) + { + max_projection = temp; + max_idx = r; + } + } + + return max_idx; + } + + // ------------------------------------------------------------------------------------ + + template <typename T> + void get_kernel_colum ( + long idx, + const T& x, + scalar_vector_type& col + ) const + { + col.set_size(x.nr()); + for (long i = 0; i < col.size(); ++i) + { + col(i) = kernel(x(idx), x(i)) + tau; + } + } + + // ------------------------------------------------------------------------------------ + + template <typename M1, typename M2> + scalar_type compute_initial_alpha ( + const M1& phi, + const M2& t + ) const + { + const double temp = length_squared(phi); + const double temp2 = trans(phi)*t; + + return temp/( temp2*temp2/temp + variance(t)*0.1); + } + + // ------------------------------------------------------------------------------------ + + // private member variables + kernel_type kernel; + scalar_type eps; + unsigned long max_iterations; + + const static scalar_type tau; + + }; // end of class rvm_trainer + + template <typename kernel_type> + const typename kernel_type::scalar_type rvm_trainer<kernel_type>::tau = static_cast<typename kernel_type::scalar_type>(0.001); + +// ---------------------------------------------------------------------------------------- + + template <typename K> + void swap ( + rvm_trainer<K>& a, + rvm_trainer<K>& b + ) { a.swap(b); } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template < + typename kern_type + > + class rvm_regression_trainer + { + /*! + This is an implementation of the regression version of the + relevance vector machine algorithm described in the paper: + Tipping, M. E. and A. C. Faul (2003). Fast marginal likelihood maximisation + for sparse Bayesian models. In C. M. Bishop and B. J. Frey (Eds.), Proceedings + of the Ninth International Workshop on Artificial Intelligence and Statistics, + Key West, FL, Jan 3-6. + + This code mostly does what is described in the above paper with the exception + that here we use a different stopping condition as well as a modified alpha + selection rule. See the code for the exact details. + !*/ + + public: + typedef kern_type kernel_type; + typedef typename kernel_type::scalar_type scalar_type; + typedef typename kernel_type::sample_type sample_type; + typedef typename kernel_type::mem_manager_type mem_manager_type; + typedef decision_function<kernel_type> trained_function_type; + + rvm_regression_trainer ( + ) : eps(0.001) + { + } + + void set_epsilon ( + scalar_type eps_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(eps_ > 0, + "\tvoid rvm_regression_trainer::set_epsilon(eps_)" + << "\n\t invalid inputs were given to this function" + << "\n\t eps: " << eps_ + ); + eps = eps_; + } + + const scalar_type get_epsilon ( + ) const + { + return eps; + } + + void set_kernel ( + const kernel_type& k + ) + { + kernel = k; + } + + const kernel_type& get_kernel ( + ) const + { + return kernel; + } + + template < + typename in_sample_vector_type, + typename in_scalar_vector_type + > + const decision_function<kernel_type> train ( + const in_sample_vector_type& x, + const in_scalar_vector_type& t + ) const + { + return do_train(mat(x), mat(t)); + } + + void swap ( + rvm_regression_trainer& item + ) + { + exchange(kernel, item.kernel); + exchange(eps, item.eps); + } + + private: + + // ------------------------------------------------------------------------------------ + + typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type; + typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type; + + template < + typename in_sample_vector_type, + typename in_scalar_vector_type + > + const decision_function<kernel_type> do_train ( + const in_sample_vector_type& x, + const in_scalar_vector_type& t + ) const + { + + // make sure requires clause is not broken + DLIB_ASSERT(is_learning_problem(x,t) && x.size() > 0, + "\tdecision_function rvm_regression_trainer::train(x,t)" + << "\n\t invalid inputs were given to this function" + << "\n\t x.nr(): " << x.nr() + << "\n\t t.nr(): " << t.nr() + << "\n\t x.nc(): " << x.nc() + << "\n\t t.nc(): " << t.nc() + ); + + + /*! This is the convention for the active_bases variable in the function: + - if (active_bases(i) >= 0) then + - alpha(active_bases(i)) == the alpha value associated with sample x(i) + - weights(active_bases(i)) == the weight value associated with sample x(i) + - colm(phi, active_bases(i)) == the column of phi associated with sample x(i) + - colm(phi, active_bases(i)) == kernel column i (from get_kernel_colum()) + - else + - the i'th sample isn't in the model and notionally has an alpha of infinity and + a weight of 0. + !*/ + matrix<long,0,1,mem_manager_type> active_bases(x.nr()); + scalar_matrix_type phi(x.nr(),1); + scalar_vector_type alpha(1), prev_alpha; + scalar_vector_type weights(1), prev_weights; + + scalar_vector_type tempv, K_col; + scalar_type var = variance(t)*0.1; + + // set the initial values of these guys + set_all_elements(active_bases, -1); + long first_basis = pick_initial_vector(x,t); + get_kernel_colum(first_basis, x, K_col); + active_bases(first_basis) = 0; + set_colm(phi,0) = K_col; + alpha(0) = compute_initial_alpha(phi, t, var); + weights(0) = 1; + + + // now declare a bunch of other variables we will be using below + scalar_vector_type Q, S; + scalar_matrix_type sigma; + + matrix<scalar_type,1,0,mem_manager_type> tempv2, tempv3; + scalar_matrix_type tempm; + + + Q.set_size(x.nr()); + S.set_size(x.nr()); + + + bool search_all_alphas = false; + unsigned long ticker = 0; + const unsigned long rounds_of_narrow_search = 100; + + while (true) + { + // Compute optimal weights and sigma for current alpha using equation 6. + sigma = trans(phi)*phi/var; + for (long r = 0; r < alpha.nr(); ++r) + sigma(r,r) += alpha(r); + sigma = inv(sigma); + weights = sigma*trans(phi)*t/var; + + + + // check if we should do a full search for the best alpha to optimize + if (ticker == rounds_of_narrow_search) + { + // if the current alpha and weights are equal to what they were + // at the last time we were about to start a wide search then + // we are done. + if (equal(prev_alpha, alpha, eps) && equal(prev_weights, weights, eps)) + break; + + prev_alpha = alpha; + prev_weights = weights; + search_all_alphas = true; + ticker = 0; + } + else + { + search_all_alphas = false; + } + ++ticker; + + // compute S and Q using equations 24 and 25 (tempv = phi*sigma*trans(phi)*B*t) + tempv = phi*tmp(sigma*tmp(trans(phi)*t/var)); + for (long i = 0; i < S.size(); ++i) + { + // if we are currently limiting the search for the next alpha to update + // to the set in the active set then skip a non-active vector. + if (search_all_alphas == false && active_bases(i) == -1) + continue; + + // get the column for this sample out of the kernel matrix. If it is + // something in the active set then just get it right out of phi, otherwise + // we have to calculate it. + if (active_bases(i) != -1) + K_col = colm(phi,active_bases(i)); + else + get_kernel_colum(i, x, K_col); + + // tempv2 = trans(phi_m)*B + tempv2 = trans(K_col)/var; + tempv3 = tempv2*phi; + S(i) = tempv2*K_col - tempv3*sigma*trans(tempv3); + Q(i) = tempv2*t - tempv2*tempv; + } + + const long selected_idx = rvm_helpers::find_next_best_alpha_to_update(S,Q,alpha,active_bases, search_all_alphas, eps); + + // if find_next_best_alpha_to_update didn't find any good alpha to update + if (selected_idx == -1) + { + if (search_all_alphas == false) + { + // jump us to where search_all_alphas will be set to true and try again + ticker = rounds_of_narrow_search; + continue; + } + else + { + // we are really done so quit the main loop + break; + } + } + + // recompute the variance + var = length_squared(t - phi*weights)/(x.nr() - weights.size() + trans(alpha)*diag(sigma)); + + // next we update the selected alpha. + + // if the selected alpha is in the active set + if (active_bases(selected_idx) >= 0) + { + const long idx = active_bases(selected_idx); + const scalar_type s = alpha(idx)*S(selected_idx)/(alpha(idx) - S(selected_idx)); + const scalar_type q = alpha(idx)*Q(selected_idx)/(alpha(idx) - S(selected_idx)); + + if (q*q-s > 0) + { + // reestimate the value of alpha + alpha(idx) = s*s/(q*q-s); + + } + else + { + // the new alpha value is infinite so remove the selected alpha from our model + active_bases(selected_idx) = -1; + phi = remove_col(phi, idx); + weights = remove_row(weights, idx); + alpha = remove_row(alpha, idx); + + // fix the index values in active_bases + for (long i = 0; i < active_bases.size(); ++i) + { + if (active_bases(i) > idx) + { + active_bases(i) -= 1; + } + } + } + } + else + { + const scalar_type s = S(selected_idx); + const scalar_type q = Q(selected_idx); + + if (q*q-s > 0) + { + // add the selected alpha to our model + + active_bases(selected_idx) = phi.nc(); + + // update alpha + tempv.set_size(alpha.size()+1); + set_subm(tempv, get_rect(alpha)) = alpha; + tempv(phi.nc()) = s*s/(q*q-s); + tempv.swap(alpha); + + // update weights + tempv.set_size(weights.size()+1); + set_subm(tempv, get_rect(weights)) = weights; + tempv(phi.nc()) = 0; + tempv.swap(weights); + + // update phi by adding the new sample's kernel matrix column in as one of phi's columns + tempm.set_size(phi.nr(), phi.nc()+1); + set_subm(tempm, get_rect(phi)) = phi; + get_kernel_colum(selected_idx, x, K_col); + set_colm(tempm, phi.nc()) = K_col; + tempm.swap(phi); + + } + } + + + + } // end while(true). So we have converged on the final answer. + + + // now put everything into a decision_function object and return it + std_vector_c<sample_type> dictionary; + std_vector_c<scalar_type> final_weights; + for (long i = 0; i < active_bases.size(); ++i) + { + if (active_bases(i) >= 0) + { + dictionary.push_back(x(i)); + final_weights.push_back(weights(active_bases(i))); + } + } + + return decision_function<kernel_type> ( mat(final_weights), + -sum(mat(final_weights))*tau, + kernel, + mat(dictionary)); + + } + + // ------------------------------------------------------------------------------------ + + template <typename T> + void get_kernel_colum ( + long idx, + const T& x, + scalar_vector_type& col + ) const + { + col.set_size(x.nr()); + for (long i = 0; i < col.size(); ++i) + { + col(i) = kernel(x(idx), x(i)) + tau; + } + } + + // ------------------------------------------------------------------------------------ + + template <typename M1, typename M2> + scalar_type compute_initial_alpha ( + const M1& phi, + const M2& t, + const scalar_type& var + ) const + { + const double temp = length_squared(phi); + const double temp2 = trans(phi)*t; + + return temp/( temp2*temp2/temp + var); + } + + // ------------------------------------------------------------------------------------ + + template <typename M1, typename M2> + long pick_initial_vector ( + const M1& x, + const M2& t + ) const + { + scalar_vector_type K_col; + double max_projection = -std::numeric_limits<scalar_type>::infinity(); + long max_idx = 0; + // find the row in the kernel matrix that has the biggest normalized projection onto the t vector + for (long r = 0; r < x.nr(); ++r) + { + get_kernel_colum(r,x,K_col); + double temp = trans(K_col)*t; + temp = temp*temp/length_squared(K_col); + + if (temp > max_projection) + { + max_projection = temp; + max_idx = r; + } + } + + return max_idx; + } + + // ------------------------------------------------------------------------------------ + + // private member variables + kernel_type kernel; + scalar_type eps; + + const static scalar_type tau; + + }; // end of class rvm_regression_trainer + + template <typename kernel_type> + const typename kernel_type::scalar_type rvm_regression_trainer<kernel_type>::tau = static_cast<typename kernel_type::scalar_type>(0.001); + +// ---------------------------------------------------------------------------------------- + + template <typename K> + void swap ( + rvm_regression_trainer<K>& a, + rvm_regression_trainer<K>& b + ) { a.swap(b); } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_RVm_ + + |