Merging upstream version 1.44.3.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-03-09 13:19:48 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-03-09 13:20:02 +0000
commit: 58daab21cd043e1dc37024a7f99b396788372918 (patch)
tree: 96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /ml/dlib/dlib/svm/rvm.h
parent: Releasing debian version 1.43.2-1. (diff)
download: netdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz
netdata-58daab21cd043e1dc37024a7f99b396788372918.zip
1 files changed, 1018 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/rvm.h b/ml/dlib/dlib/svm/rvm.h
new file mode 100644
index 000000000..e7ad495a2
--- /dev/null
+++ b/ml/dlib/dlib/svm/rvm.h
@@ -0,0 +1,1018 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_RVm_
+#define DLIB_RVm_
+
+#include "rvm_abstract.h"
+#include <cmath>
+#include <limits>
+#include "../matrix.h"
+#include "../algs.h"
+#include "function.h"
+#include "kernel.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace rvm_helpers
+    {
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename scalar_vector_type, typename mem_manager_type>
+        long find_next_best_alpha_to_update (
+            const scalar_vector_type& S,
+            const scalar_vector_type& Q,
+            const scalar_vector_type& alpha,
+            const matrix<long,0,1,mem_manager_type>& active_bases,
+            const bool search_all_alphas,
+            typename scalar_vector_type::type eps
+        ) 
+        /*!
+            ensures
+                - if (we can find another alpha to update) then
+                    - returns the index of said alpha 
+                - else
+                    - returns -1
+        !*/
+        {
+            typedef typename scalar_vector_type::type scalar_type;
+            // now use S and Q to find next alpha to update.  What
+            // we want to do here is select the alpha to update that gives us
+            // the greatest improvement in marginal likelihood.
+            long selected_idx = -1;
+            scalar_type greatest_improvement = -1;
+            for (long i = 0; i < S.nr(); ++i)
+            {
+                scalar_type value = -1;
+
+                // if i is currently in the active set
+                if (active_bases(i) >= 0)
+                {
+                    const long idx = active_bases(i);
+                    const scalar_type s = alpha(idx)*S(i)/(alpha(idx) - S(i));
+                    const scalar_type q = alpha(idx)*Q(i)/(alpha(idx) - S(i));
+
+                    if (q*q-s > 0)
+                    {
+                        // only update an existing alpha if this is a narrow search
+                        if (search_all_alphas == false)
+                        {
+                            // choosing this sample would mean doing an update of an 
+                            // existing alpha value.
+                            scalar_type new_alpha = s*s/(q*q-s);
+                            scalar_type cur_alpha = alpha(idx);
+                            new_alpha = 1/new_alpha;
+                            cur_alpha = 1/cur_alpha;
+
+                            // from equation 32 in the Tipping paper 
+                            value = Q(i)*Q(i)/(S(i) +  1/(new_alpha - cur_alpha) ) - 
+                                std::log(1 + S(i)*(new_alpha - cur_alpha));
+                        }
+
+                    }
+                    // we only pick an alpha to remove if this is a wide search and it wasn't one of the recently added ones 
+                    else if (search_all_alphas && idx+2 < alpha.size() )  
+                    {
+                        // choosing this sample would mean the alpha value is infinite 
+                        // so we would remove the selected sample from our model.
+
+                        // from equation 37 in the Tipping paper 
+                        value = Q(i)*Q(i)/(S(i) - alpha(idx)) - 
+                            std::log(1-S(i)/alpha(idx));
+
+                    }
+                }
+                else if (search_all_alphas)
+                {
+                    const scalar_type s = S(i);
+                    const scalar_type q = Q(i);
+
+                    if (q*q-s > 0)
+                    {
+                        // choosing this sample would mean we would add the selected 
+                        // sample to our model.
+
+                        // from equation 27 in the Tipping paper 
+                        value = (Q(i)*Q(i)-S(i))/S(i) + std::log(S(i)/(Q(i)*Q(i)));
+                    }
+                }
+
+                if (value > greatest_improvement)
+                {
+                    greatest_improvement = value;
+                    selected_idx = i;
+                }
+            }
+
+            // If the greatest_improvement in marginal likelihood we would get is less
+            // than our epsilon then report that there isn't anything else to do.  But
+            // if it is big enough then return the selected_idx.
+            if (greatest_improvement > eps)
+                return selected_idx;
+            else
+                return -1;
+        }
+
+    } // end namespace rvm_helpers
+
+    // ------------------------------------------------------------------------------------
+
+
+    template <
+        typename kern_type 
+        >
+    class rvm_trainer 
+    {
+        /*!
+            This is an implementation of the binary classifier version of the
+            relevance vector machine algorithm described in the paper:
+                Tipping, M. E. and A. C. Faul (2003). Fast marginal likelihood maximisation 
+                for sparse Bayesian models. In C. M. Bishop and B. J. Frey (Eds.), Proceedings 
+                of the Ninth International Workshop on Artificial Intelligence and Statistics, 
+                Key West, FL, Jan 3-6.
+
+            This code mostly does what is described in the above paper with the exception 
+            that here we use a different stopping condition as well as a modified alpha
+            selection rule.  See the code for the exact details.
+        !*/
+
+    public:
+        typedef kern_type kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        rvm_trainer (
+        ) : eps(0.001), max_iterations(2000)
+        {
+        }
+
+        void set_max_iterations (
+            unsigned long max_iterations_
+        )
+        {
+            max_iterations = max_iterations_;
+        }
+
+        unsigned long get_max_iterations (
+        ) const
+        { 
+            return max_iterations;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid rvm_trainer::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const
+        { 
+            return eps;
+        }
+
+        void set_kernel (
+            const kernel_type& k
+        )
+        {
+            kernel = k;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            return do_train(mat(x), mat(y));
+        }
+
+        void swap (
+            rvm_trainer& item
+        )
+        {
+            exchange(kernel, item.kernel);
+            exchange(eps, item.eps);
+        }
+
+    private:
+
+    // ------------------------------------------------------------------------------------
+
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type;
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
+                "\tdecision_function rvm_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t y.nr(): " << y.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                << "\n\t y.nc(): " << y.nc() 
+                << "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
+                );
+
+            // make a target vector where +1 examples have value 1 and -1 examples
+            // have a value of 0.
+            scalar_vector_type t(y.size());
+            for (long i = 0; i < y.size(); ++i)
+            {
+                if (y(i) == 1)
+                    t(i) = 1;
+                else
+                    t(i) = 0;
+            }
+
+            /*! This is the convention for the active_bases variable in the function:
+                - if (active_bases(i) >= 0) then
+                    - alpha(active_bases(i)) == the alpha value associated with sample x(i)
+                    - weights(active_bases(i)) == the weight value associated with sample x(i)
+                    - colm(phi, active_bases(i)) == the column of phi associated with sample x(i)
+                    - colm(phi, active_bases(i)) == kernel column i (from get_kernel_colum()) 
+                - else
+                    - the i'th sample isn't in the model and notionally has an alpha of infinity and
+                      a weight of 0.
+            !*/
+            matrix<long,0,1,mem_manager_type> active_bases(x.nr());
+            scalar_matrix_type phi(x.nr(),1);
+            scalar_vector_type alpha(1), prev_alpha;
+            scalar_vector_type weights(1), prev_weights;
+
+            scalar_vector_type tempv, K_col; 
+
+            // set the initial values of these guys
+            set_all_elements(active_bases, -1);
+            long first_basis = pick_initial_vector(x,t);
+            get_kernel_colum(first_basis, x, K_col);
+            active_bases(first_basis) = 0;
+            set_colm(phi,0) = K_col;
+            alpha(0) = compute_initial_alpha(phi, t);
+            weights(0) = 1;
+
+
+            // now declare a bunch of other variables we will be using below
+            scalar_vector_type mu, t_hat, Q, S; 
+            scalar_matrix_type sigma;
+            
+            matrix<scalar_type,1,0,mem_manager_type> tempv2, tempv3;
+            scalar_matrix_type tempm;
+
+            scalar_vector_type t_estimate;
+            scalar_vector_type beta;
+
+
+            Q.set_size(x.nr());
+            S.set_size(x.nr());
+
+            bool recompute_beta = true;
+
+            bool search_all_alphas = false;
+            unsigned long ticker = 0;
+            const unsigned long rounds_of_narrow_search = 100;
+            unsigned long iterations = 0;
+
+            while (iterations != max_iterations)
+            {
+                iterations++;
+                if (recompute_beta)
+                {
+                    // calculate the current t_estimate. (this is the predicted t value for each sample according to the
+                    // current state of the classifier)
+                    t_estimate = phi*weights;
+
+                    // calculate the current beta
+                    beta = sigmoid(t_estimate);
+                    beta = pointwise_multiply(beta,(uniform_matrix<scalar_type>(beta.nr(),beta.nc(),1)-beta));
+                    recompute_beta = false;
+                }
+
+                // Compute optimal weights and sigma for current alpha using IRLS.  This is the same
+                // technique documented in the paper by equations 12-14. 
+                scalar_type weight_delta = std::numeric_limits<scalar_type>::max();
+                int count = 0;
+                while (weight_delta > 0.0001)
+                {
+                    // This is a sanity check to make sure we never get stuck in this
+                    // loop to do some degenerate numerical condition 
+                    ++count;
+                    if (count > 100)
+                    {
+                        // jump us to where search_all_alphas will be set to true 
+                        ticker = rounds_of_narrow_search;
+                        break;
+                    }
+
+                    // compute the updated sigma matrix
+                    sigma = scale_columns(trans(phi),beta)*phi;
+                    for (long r = 0; r < alpha.nr(); ++r)
+                        sigma(r,r) += alpha(r);
+                    sigma = inv(sigma);
+
+
+                    // compute the updated weights vector (t_hat = phi*mu_mp + inv(B)*(t-y))
+                    t_hat = t_estimate + trans(scale_columns(trans(t-sigmoid(t_estimate)),reciprocal(beta))); 
+
+                    // mu = sigma*trans(phi)*b*t_hat
+                    mu = sigma*tmp(trans(phi)* trans(scale_columns(trans(t_hat), beta)));  
+
+                    // now compute how much the weights vector changed during this iteration
+                    // through this loop.
+                    weight_delta = max(abs(mu-weights));
+
+                    // put mu into the weights vector
+                    mu.swap(weights);
+
+                    // calculate the current t_estimate
+                    t_estimate = phi*weights;
+
+                    // calculate the current beta
+                    beta = sigmoid(t_estimate);
+                    beta = pointwise_multiply(beta, uniform_matrix<scalar_type>(beta.nr(),beta.nc(),1)-beta);
+
+                }
+
+                // check if we should do a full search for the best alpha to optimize
+                if (ticker >= rounds_of_narrow_search)
+                {
+                    // if the current alpha and weights are equal to what they were
+                    // at the last time we were about to start a wide search then
+                    // we are done.
+                    if (equal(prev_alpha, alpha, eps) && equal(prev_weights, weights, eps))
+                        break;
+
+
+                    prev_alpha = alpha;
+                    prev_weights = weights;
+                    search_all_alphas = true;
+                    ticker = 0;
+                }
+                else
+                {
+                    search_all_alphas = false;
+                }
+                ++ticker;
+
+                // compute S and Q using equations 24 and 25 (tempv = phi*sigma*trans(phi)*B*t_hat)
+                tempv = phi*tmp(sigma*tmp(trans(phi)*trans(scale_columns(trans(t_hat),beta)))); 
+                for (long i = 0; i < S.size(); ++i)
+                {
+                    // if we are currently limiting the search for the next alpha to update
+                    // to the set in the active set then skip a non-active vector.
+                    if (search_all_alphas == false && active_bases(i) == -1)
+                        continue;
+
+                    // get the column for this sample out of the kernel matrix.  If it is 
+                    // something in the active set then just get it right out of phi, otherwise 
+                    // we have to calculate it.
+                    if (active_bases(i) != -1)
+                        K_col = colm(phi,active_bases(i));
+                    else
+                        get_kernel_colum(i, x, K_col);
+
+                    // tempv2 = trans(phi_m)*B
+                    tempv2 = scale_columns(trans(K_col), beta);  
+                    tempv3 = tempv2*phi;
+                    S(i) = tempv2*K_col - tempv3*sigma*trans(tempv3);
+                    Q(i) = tempv2*t_hat - tempv2*tempv; 
+                }
+
+                const long selected_idx = rvm_helpers::find_next_best_alpha_to_update(S,Q,alpha,active_bases, search_all_alphas, eps);
+
+
+                // if find_next_best_alpha_to_update didn't find any good alpha to update
+                if (selected_idx == -1)
+                {
+                    if (search_all_alphas == false)
+                    {
+                        // jump us to where search_all_alphas will be set to true and try again
+                        ticker = rounds_of_narrow_search;
+                        continue;
+                    }
+                    else
+                    {
+                        // we are really done so quit the main loop
+                        break;
+                    }
+                }
+
+
+                // next we update the selected alpha.
+
+                // if the selected alpha is in the active set
+                if (active_bases(selected_idx) >= 0)
+                {
+                    const long idx = active_bases(selected_idx);
+                    const scalar_type s = alpha(idx)*S(selected_idx)/(alpha(idx) - S(selected_idx));
+                    const scalar_type q = alpha(idx)*Q(selected_idx)/(alpha(idx) - S(selected_idx));
+
+                    if (q*q-s > 0)
+                    {
+                        // reestimate the value of alpha
+                        alpha(idx) = s*s/(q*q-s);
+
+                    }
+                    else 
+                    {
+                        // the new alpha value is infinite so remove the selected alpha from our model
+                        active_bases(selected_idx) = -1; 
+                        phi = remove_col(phi, idx);
+                        weights = remove_row(weights, idx);
+                        alpha = remove_row(alpha, idx);
+
+                        // fix the index values in active_bases
+                        for (long i = 0; i < active_bases.size(); ++i)
+                        {
+                            if (active_bases(i) > idx)
+                            {
+                                active_bases(i) -= 1;
+                            }
+                        }
+
+                        // we changed the number of weights so we need to remember to 
+                        // recompute the beta vector next time around the main loop.
+                        recompute_beta = true;
+                    }
+                }
+                else
+                {
+                    const scalar_type s = S(selected_idx);
+                    const scalar_type q = Q(selected_idx);
+
+                    if (q*q-s > 0)
+                    {
+                        // add the selected alpha to our model
+                        
+                        active_bases(selected_idx) = phi.nc();
+                        
+                        // update alpha
+                        tempv.set_size(alpha.size()+1);
+                        set_subm(tempv, get_rect(alpha)) = alpha;
+                        tempv(phi.nc()) = s*s/(q*q-s);
+                        tempv.swap(alpha);
+
+                        // update weights 
+                        tempv.set_size(weights.size()+1);
+                        set_subm(tempv, get_rect(weights)) = weights;
+                        tempv(phi.nc()) = 0;
+                        tempv.swap(weights);
+
+                        // update phi by adding the new sample's kernel matrix column in as one of phi's columns
+                        tempm.set_size(phi.nr(), phi.nc()+1);
+                        set_subm(tempm, get_rect(phi)) = phi;
+                        get_kernel_colum(selected_idx, x, K_col);
+                        set_colm(tempm, phi.nc()) = K_col;
+                        tempm.swap(phi);
+
+
+                        // we changed the number of weights so we need to remember to 
+                        // recompute the beta vector next time around the main loop.
+                        recompute_beta = true;
+                    }
+                }
+
+            } // end while(true).  So we have converged on the final answer.
+
+
+            // now put everything into a decision_function object and return it
+            std_vector_c<sample_type> dictionary;
+            std_vector_c<scalar_type> final_weights;
+            for (long i = 0; i < active_bases.size(); ++i)
+            {
+                if (active_bases(i) >= 0)
+                {
+                    dictionary.push_back(x(i));
+                    final_weights.push_back(weights(active_bases(i)));
+                }
+            }
+
+            return decision_function<kernel_type> ( mat(final_weights),
+                                                    -sum(mat(final_weights))*tau, 
+                                                    kernel,
+                                                    mat(dictionary));
+
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename M1, typename M2>
+        long pick_initial_vector (
+            const M1& x,
+            const M2& t
+        ) const
+        {
+            scalar_vector_type K_col;
+            double max_projection = -std::numeric_limits<scalar_type>::infinity();
+            long max_idx = 0;
+            // find the row in the kernel matrix that has the biggest normalized projection onto the t vector
+            for (long r = 0; r < x.nr(); ++r)
+            {
+                get_kernel_colum(r,x,K_col);
+                double temp = trans(K_col)*t;
+                temp = temp*temp/length_squared(K_col);
+
+                if (temp > max_projection)
+                {
+                    max_projection = temp;
+                    max_idx = r;
+                }
+            }
+
+            return max_idx;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename T>
+        void get_kernel_colum (
+            long idx,
+            const T& x,
+            scalar_vector_type& col
+        ) const
+        {
+            col.set_size(x.nr());
+            for (long i = 0; i < col.size(); ++i)
+            {
+                col(i) = kernel(x(idx), x(i)) + tau;
+            }
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename M1, typename M2>
+        scalar_type compute_initial_alpha (
+            const M1& phi,
+            const M2& t
+        ) const
+        {
+            const double temp = length_squared(phi);
+            const double temp2 = trans(phi)*t;
+
+            return temp/( temp2*temp2/temp + variance(t)*0.1);
+        }
+
+    // ------------------------------------------------------------------------------------
+
+    // private member variables
+        kernel_type kernel;
+        scalar_type eps;
+        unsigned long max_iterations;
+
+        const static scalar_type tau;
+
+    }; // end of class rvm_trainer 
+
+    template <typename kernel_type>
+    const typename kernel_type::scalar_type rvm_trainer<kernel_type>::tau = static_cast<typename kernel_type::scalar_type>(0.001);
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename K>
+    void swap (
+        rvm_trainer<K>& a,
+        rvm_trainer<K>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kern_type 
+        >
+    class rvm_regression_trainer 
+    {
+        /*!
+            This is an implementation of the regression version of the
+            relevance vector machine algorithm described in the paper:
+                Tipping, M. E. and A. C. Faul (2003). Fast marginal likelihood maximisation 
+                for sparse Bayesian models. In C. M. Bishop and B. J. Frey (Eds.), Proceedings 
+                of the Ninth International Workshop on Artificial Intelligence and Statistics, 
+                Key West, FL, Jan 3-6.
+
+            This code mostly does what is described in the above paper with the exception 
+            that here we use a different stopping condition as well as a modified alpha
+            selection rule.  See the code for the exact details.
+        !*/
+
+    public:
+        typedef kern_type kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        rvm_regression_trainer (
+        ) : eps(0.001)
+        {
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid rvm_regression_trainer::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const
+        { 
+            return eps;
+        }
+
+        void set_kernel (
+            const kernel_type& k
+        )
+        {
+            kernel = k;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& t
+        ) const
+        {
+            return do_train(mat(x), mat(t));
+        }
+
+        void swap (
+            rvm_regression_trainer& item
+        )
+        {
+            exchange(kernel, item.kernel);
+            exchange(eps, item.eps);
+        }
+
+    private:
+
+    // ------------------------------------------------------------------------------------
+
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type;
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& t
+        ) const
+        {
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(x,t) && x.size() > 0,
+                "\tdecision_function rvm_regression_trainer::train(x,t)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t t.nr(): " << t.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                << "\n\t t.nc(): " << t.nc() 
+                );
+
+
+            /*! This is the convention for the active_bases variable in the function:
+                - if (active_bases(i) >= 0) then
+                    - alpha(active_bases(i)) == the alpha value associated with sample x(i)
+                    - weights(active_bases(i)) == the weight value associated with sample x(i)
+                    - colm(phi, active_bases(i)) == the column of phi associated with sample x(i)
+                    - colm(phi, active_bases(i)) == kernel column i (from get_kernel_colum()) 
+                - else
+                    - the i'th sample isn't in the model and notionally has an alpha of infinity and
+                      a weight of 0.
+            !*/
+            matrix<long,0,1,mem_manager_type> active_bases(x.nr());
+            scalar_matrix_type phi(x.nr(),1);
+            scalar_vector_type alpha(1), prev_alpha;
+            scalar_vector_type weights(1), prev_weights;
+
+            scalar_vector_type tempv, K_col; 
+            scalar_type var = variance(t)*0.1;
+
+            // set the initial values of these guys
+            set_all_elements(active_bases, -1);
+            long first_basis = pick_initial_vector(x,t);
+            get_kernel_colum(first_basis, x, K_col);
+            active_bases(first_basis) = 0;
+            set_colm(phi,0) = K_col;
+            alpha(0) = compute_initial_alpha(phi, t, var);
+            weights(0) = 1;
+
+
+            // now declare a bunch of other variables we will be using below
+            scalar_vector_type Q, S; 
+            scalar_matrix_type sigma;
+            
+            matrix<scalar_type,1,0,mem_manager_type> tempv2, tempv3;
+            scalar_matrix_type tempm;
+
+
+            Q.set_size(x.nr());
+            S.set_size(x.nr());
+
+
+            bool search_all_alphas = false;
+            unsigned long ticker = 0;
+            const unsigned long rounds_of_narrow_search = 100;
+
+            while (true)
+            {
+                // Compute optimal weights and sigma for current alpha using equation 6. 
+                sigma = trans(phi)*phi/var;
+                for (long r = 0; r < alpha.nr(); ++r)
+                    sigma(r,r) += alpha(r);
+                sigma = inv(sigma);
+                weights = sigma*trans(phi)*t/var;  
+
+
+
+                // check if we should do a full search for the best alpha to optimize
+                if (ticker == rounds_of_narrow_search)
+                {
+                    // if the current alpha and weights are equal to what they were
+                    // at the last time we were about to start a wide search then
+                    // we are done.
+                    if (equal(prev_alpha, alpha, eps) && equal(prev_weights, weights, eps))
+                        break;
+
+                    prev_alpha = alpha;
+                    prev_weights = weights;
+                    search_all_alphas = true;
+                    ticker = 0;
+                }
+                else
+                {
+                    search_all_alphas = false;
+                }
+                ++ticker;
+
+                // compute S and Q using equations 24 and 25 (tempv = phi*sigma*trans(phi)*B*t)
+                tempv = phi*tmp(sigma*tmp(trans(phi)*t/var)); 
+                for (long i = 0; i < S.size(); ++i)
+                {
+                    // if we are currently limiting the search for the next alpha to update
+                    // to the set in the active set then skip a non-active vector.
+                    if (search_all_alphas == false && active_bases(i) == -1)
+                        continue;
+
+                    // get the column for this sample out of the kernel matrix.  If it is 
+                    // something in the active set then just get it right out of phi, otherwise 
+                    // we have to calculate it.
+                    if (active_bases(i) != -1)
+                        K_col = colm(phi,active_bases(i));
+                    else
+                        get_kernel_colum(i, x, K_col);
+
+                    // tempv2 = trans(phi_m)*B
+                    tempv2 = trans(K_col)/var;  
+                    tempv3 = tempv2*phi;
+                    S(i) = tempv2*K_col - tempv3*sigma*trans(tempv3);
+                    Q(i) = tempv2*t - tempv2*tempv; 
+                }
+
+                const long selected_idx = rvm_helpers::find_next_best_alpha_to_update(S,Q,alpha,active_bases, search_all_alphas, eps);
+
+                // if find_next_best_alpha_to_update didn't find any good alpha to update
+                if (selected_idx == -1)
+                {
+                    if (search_all_alphas == false)
+                    {
+                        // jump us to where search_all_alphas will be set to true and try again
+                        ticker = rounds_of_narrow_search;
+                        continue;
+                    }
+                    else
+                    {
+                        // we are really done so quit the main loop
+                        break;
+                    }
+                }
+
+                // recompute the variance
+                var = length_squared(t - phi*weights)/(x.nr() - weights.size() + trans(alpha)*diag(sigma));
+
+                // next we update the selected alpha.
+
+                // if the selected alpha is in the active set
+                if (active_bases(selected_idx) >= 0)
+                {
+                    const long idx = active_bases(selected_idx);
+                    const scalar_type s = alpha(idx)*S(selected_idx)/(alpha(idx) - S(selected_idx));
+                    const scalar_type q = alpha(idx)*Q(selected_idx)/(alpha(idx) - S(selected_idx));
+
+                    if (q*q-s > 0)
+                    {
+                        // reestimate the value of alpha
+                        alpha(idx) = s*s/(q*q-s);
+
+                    }
+                    else 
+                    {
+                        // the new alpha value is infinite so remove the selected alpha from our model
+                        active_bases(selected_idx) = -1; 
+                        phi = remove_col(phi, idx);
+                        weights = remove_row(weights, idx);
+                        alpha = remove_row(alpha, idx);
+
+                        // fix the index values in active_bases
+                        for (long i = 0; i < active_bases.size(); ++i)
+                        {
+                            if (active_bases(i) > idx)
+                            {
+                                active_bases(i) -= 1;
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    const scalar_type s = S(selected_idx);
+                    const scalar_type q = Q(selected_idx);
+
+                    if (q*q-s > 0)
+                    {
+                        // add the selected alpha to our model
+                        
+                        active_bases(selected_idx) = phi.nc();
+                        
+                        // update alpha
+                        tempv.set_size(alpha.size()+1);
+                        set_subm(tempv, get_rect(alpha)) = alpha;
+                        tempv(phi.nc()) = s*s/(q*q-s);
+                        tempv.swap(alpha);
+
+                        // update weights 
+                        tempv.set_size(weights.size()+1);
+                        set_subm(tempv, get_rect(weights)) = weights;
+                        tempv(phi.nc()) = 0;
+                        tempv.swap(weights);
+
+                        // update phi by adding the new sample's kernel matrix column in as one of phi's columns
+                        tempm.set_size(phi.nr(), phi.nc()+1);
+                        set_subm(tempm, get_rect(phi)) = phi;
+                        get_kernel_colum(selected_idx, x, K_col);
+                        set_colm(tempm, phi.nc()) = K_col;
+                        tempm.swap(phi);
+
+                    }
+                }
+
+
+
+            } // end while(true).  So we have converged on the final answer.
+
+       
+            // now put everything into a decision_function object and return it
+            std_vector_c<sample_type> dictionary;
+            std_vector_c<scalar_type> final_weights;
+            for (long i = 0; i < active_bases.size(); ++i)
+            {
+                if (active_bases(i) >= 0)
+                {
+                    dictionary.push_back(x(i));
+                    final_weights.push_back(weights(active_bases(i)));
+                }
+            }
+
+            return decision_function<kernel_type> ( mat(final_weights),
+                                                    -sum(mat(final_weights))*tau, 
+                                                    kernel,
+                                                    mat(dictionary));
+
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename T>
+        void get_kernel_colum (
+            long idx,
+            const T& x,
+            scalar_vector_type& col
+        ) const
+        {
+            col.set_size(x.nr());
+            for (long i = 0; i < col.size(); ++i)
+            {
+                col(i) = kernel(x(idx), x(i)) + tau;
+            }
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename M1, typename M2>
+        scalar_type compute_initial_alpha (
+            const M1& phi,
+            const M2& t,
+            const scalar_type& var
+        ) const
+        {
+            const double temp = length_squared(phi);
+            const double temp2 = trans(phi)*t;
+
+            return temp/( temp2*temp2/temp + var);
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename M1, typename M2>
+        long pick_initial_vector (
+            const M1& x,
+            const M2& t
+        ) const
+        {
+            scalar_vector_type K_col;
+            double max_projection = -std::numeric_limits<scalar_type>::infinity();
+            long max_idx = 0;
+            // find the row in the kernel matrix that has the biggest normalized projection onto the t vector
+            for (long r = 0; r < x.nr(); ++r)
+            {
+                get_kernel_colum(r,x,K_col);
+                double temp = trans(K_col)*t;
+                temp = temp*temp/length_squared(K_col);
+
+                if (temp > max_projection)
+                {
+                    max_projection = temp;
+                    max_idx = r;
+                }
+            }
+
+            return max_idx;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+    // private member variables
+        kernel_type kernel;
+        scalar_type eps;
+
+        const static scalar_type tau;
+
+    }; // end of class rvm_regression_trainer 
+
+    template <typename kernel_type>
+    const typename kernel_type::scalar_type rvm_regression_trainer<kernel_type>::tau = static_cast<typename kernel_type::scalar_type>(0.001);
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename K>
+    void swap (
+        rvm_regression_trainer<K>& a,
+        rvm_regression_trainer<K>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RVm_
+
+
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-03-09 13:19:48 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-03-09 13:20:02 +0000
commit	58daab21cd043e1dc37024a7f99b396788372918 (patch)
tree	96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /ml/dlib/dlib/svm/rvm.h
parent	Releasing debian version 1.43.2-1. (diff)
download	netdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz netdata-58daab21cd043e1dc37024a7f99b396788372918.zip