1 files changed, 710 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/pegasos.h b/ml/dlib/dlib/svm/pegasos.h
new file mode 100644
index 000000000..c28093fe0
--- /dev/null
+++ b/ml/dlib/dlib/svm/pegasos.h
@@ -0,0 +1,710 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_PEGASoS_
+#define DLIB_PEGASoS_
+
+#include "pegasos_abstract.h"
+#include <cmath>
+#include "../algs.h"
+#include "function.h"
+#include "kernel.h"
+#include "kcentroid.h"
+#include <iostream>
+#include <memory>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_pegasos
+    {
+        typedef kcentroid<offset_kernel<K> > kc_type;
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        template <typename K_>
+        struct rebind {
+            typedef svm_pegasos<K_> other;
+        };
+
+        svm_pegasos (
+        ) :
+            max_sv(40),
+            lambda_c1(0.0001),
+            lambda_c2(0.0001),
+            tau(0.01),
+            tolerance(0.01),
+            train_count(0),
+            w(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false)
+        {
+        }
+
+        svm_pegasos (
+            const kernel_type& kernel_, 
+            const scalar_type& lambda_,
+            const scalar_type& tolerance_,
+            unsigned long max_num_sv
+        ) :
+            max_sv(max_num_sv),
+            kernel(kernel_),
+            lambda_c1(lambda_),
+            lambda_c2(lambda_),
+            tau(0.01),
+            tolerance(tolerance_),
+            train_count(0),
+            w(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(lambda_ > 0 && tolerance > 0 && max_num_sv > 0,
+                        "\tsvm_pegasos::svm_pegasos(kernel,lambda,tolerance)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t lambda_: " << lambda_ 
+                        << "\n\t max_num_sv: " << max_num_sv 
+            );
+        }
+
+        void clear (
+        )
+        {
+            // reset the w vector back to its initial state
+            w = kc_type(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false);
+            train_count = 0;
+        }
+
+        void set_kernel (
+            kernel_type k
+        )
+        {
+            kernel = k;
+            clear();
+        }
+
+        void set_max_num_sv (
+            unsigned long max_num_sv
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(max_num_sv > 0,
+                        "\tvoid svm_pegasos::set_max_num_sv(max_num_sv)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t max_num_sv: " << max_num_sv 
+            );
+            max_sv = max_num_sv; 
+            clear();
+        }
+
+        unsigned long get_max_num_sv (
+        ) const
+        {
+            return max_sv;
+        }
+
+        void set_tolerance (
+            double tol
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < tol,
+                        "\tvoid svm_pegasos::set_tolerance(tol)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t tol: " << tol 
+            );
+            tolerance = tol;
+            clear();
+        }
+
+        void set_lambda (
+            scalar_type lambda_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < lambda_,
+                        "\tvoid svm_pegasos::set_lambda(lambda_)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t lambda_: " << lambda_ 
+            );
+            lambda_c1 = lambda_;
+            lambda_c2 = lambda_;
+
+            max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2));
+            clear();
+        }
+
+        void set_lambda_class1 (
+            scalar_type lambda_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < lambda_,
+                        "\tvoid svm_pegasos::set_lambda_class1(lambda_)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t lambda_: " << lambda_ 
+            );
+            lambda_c1 = lambda_;
+            max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2));
+            clear();
+        }
+
+        void set_lambda_class2 (
+            scalar_type lambda_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < lambda_,
+                        "\tvoid svm_pegasos::set_lambda_class2(lambda_)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t lambda_: " << lambda_ 
+            );
+            lambda_c2 = lambda_;
+            max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2));
+            clear();
+        }
+
+        const scalar_type get_lambda_class1 (
+        ) const
+        {
+            return lambda_c1;
+        }
+
+        const scalar_type get_lambda_class2 (
+        ) const
+        {
+            return lambda_c2;
+        }
+
+        const scalar_type get_tolerance (
+        ) const
+        {
+            return tolerance;
+        }
+
+        const kernel_type get_kernel (
+        ) const
+        {
+            return kernel;
+        }
+
+        unsigned long get_train_count (
+        ) const
+        {
+            return static_cast<unsigned long>(train_count);
+        }
+
+        scalar_type train (
+            const sample_type& x,
+            const scalar_type& y
+        ) 
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(y == -1 || y == 1,
+                        "\tscalar_type svm_pegasos::train(x,y)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t y: " << y
+            );
+
+            const double lambda = (y==+1)? lambda_c1 : lambda_c2;
+
+            ++train_count;
+            const scalar_type learning_rate = 1/(lambda*train_count);
+
+            // if this sample point is within the margin of the current hyperplane
+            if (y*w.inner_product(x) < 1)
+            {
+
+                // compute: w = (1-learning_rate*lambda)*w + y*learning_rate*x
+                w.train(x,  1 - learning_rate*lambda,  y*learning_rate);
+
+                scalar_type wnorm = std::sqrt(w.squared_norm());
+                scalar_type temp = max_wnorm/wnorm;
+                if (temp < 1)
+                    w.scale_by(temp);
+            }
+            else
+            {
+                w.scale_by(1 - learning_rate*lambda);
+            }
+
+            // return the current learning rate
+            return 1/(std::min(lambda_c1,lambda_c2)*train_count);
+        }
+
+        scalar_type operator() (
+            const sample_type& x
+        ) const
+        {
+            return w.inner_product(x);
+        }
+
+        const decision_function<kernel_type> get_decision_function (
+        ) const
+        {
+            distance_function<offset_kernel<kernel_type> > df = w.get_distance_function();
+            return decision_function<kernel_type>(df.get_alpha(), -tau*sum(df.get_alpha()), kernel, df.get_basis_vectors());
+        }
+
+        void swap (
+            svm_pegasos& item
+        )
+        {
+            exchange(max_sv,         item.max_sv);
+            exchange(kernel,         item.kernel);
+            exchange(lambda_c1,      item.lambda_c1);
+            exchange(lambda_c2,      item.lambda_c2);
+            exchange(max_wnorm,      item.max_wnorm);
+            exchange(tau,            item.tau);
+            exchange(tolerance,      item.tolerance);
+            exchange(train_count,    item.train_count);
+            exchange(w,              item.w);
+        }
+
+        friend void serialize(const svm_pegasos& item, std::ostream& out)
+        {
+            serialize(item.max_sv, out);
+            serialize(item.kernel, out);
+            serialize(item.lambda_c1, out);
+            serialize(item.lambda_c2, out);
+            serialize(item.max_wnorm, out);
+            serialize(item.tau, out);
+            serialize(item.tolerance, out);
+            serialize(item.train_count, out);
+            serialize(item.w, out);
+        }
+
+        friend void deserialize(svm_pegasos& item, std::istream& in)
+        {
+            deserialize(item.max_sv, in);
+            deserialize(item.kernel, in);
+            deserialize(item.lambda_c1, in);
+            deserialize(item.lambda_c2, in);
+            deserialize(item.max_wnorm, in);
+            deserialize(item.tau, in);
+            deserialize(item.tolerance, in);
+            deserialize(item.train_count, in);
+            deserialize(item.w, in);
+        }
+
+    private:
+
+        unsigned long max_sv;
+        kernel_type kernel;
+        scalar_type lambda_c1;
+        scalar_type lambda_c2;
+        scalar_type max_wnorm;
+        scalar_type tau;
+        scalar_type tolerance;
+        scalar_type train_count;
+        kc_type w;
+
+    }; // end of class svm_pegasos
+
+    template <
+        typename K 
+        >
+    void swap (
+        svm_pegasos<K>& a,
+        svm_pegasos<K>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    void replicate_settings (
+        const svm_pegasos<T>& source,
+        svm_pegasos<U>& dest
+    )
+    {
+        dest.set_tolerance(source.get_tolerance());
+        dest.set_lambda_class1(source.get_lambda_class1());
+        dest.set_lambda_class2(source.get_lambda_class2());
+        dest.set_max_num_sv(source.get_max_num_sv());
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    class batch_trainer 
+    {
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename K,
+            typename sample_vector_type
+            >
+        class caching_kernel 
+        {
+        public:
+            typedef typename K::scalar_type scalar_type;
+            typedef long sample_type;
+            //typedef typename K::sample_type sample_type;
+            typedef typename K::mem_manager_type mem_manager_type;
+
+            caching_kernel () {}
+
+            caching_kernel (
+                const K& kern,
+                const sample_vector_type& samps,
+                long cache_size_
+            ) : real_kernel(kern), samples(&samps), counter(0)  
+            {
+                cache_size = std::min<long>(cache_size_, samps.size());
+
+                cache.reset(new cache_type);
+                cache->frequency_of_use.resize(samps.size());
+                for (long i = 0; i < samps.size(); ++i)
+                    cache->frequency_of_use[i] = std::make_pair(0, i);
+
+                // Set the cache build/rebuild threshold so that we have to have
+                // as many cache misses as there are entries in the cache before
+                // we build/rebuild.
+                counter_threshold = samps.size()*cache_size;
+                cache->sample_location.assign(samples->size(), -1);
+            }
+
+            scalar_type operator() (
+                const sample_type& a,
+                const sample_type& b
+            )  const
+            { 
+                // rebuild the cache every so often
+                if (counter > counter_threshold )
+                {
+                    build_cache();
+                }
+
+                const long a_loc = cache->sample_location[a];
+                const long b_loc = cache->sample_location[b];
+
+                cache->frequency_of_use[a].first += 1;
+                cache->frequency_of_use[b].first += 1;
+
+                if (a_loc != -1)
+                {
+                    return cache->kernel(a_loc, b);
+                }
+                else if (b_loc != -1)
+                {
+                    return cache->kernel(b_loc, a);
+                }
+                else
+                {
+                    ++counter;
+                    return real_kernel((*samples)(a), (*samples)(b));
+                }
+            }
+
+            bool operator== (
+                const caching_kernel& item
+            ) const
+            {
+                return item.real_kernel == real_kernel &&
+                    item.samples == samples;
+            }
+
+        private:
+            K real_kernel;
+
+            void build_cache (
+            ) const
+            {
+                std::sort(cache->frequency_of_use.rbegin(), cache->frequency_of_use.rend());
+                counter = 0;
+
+
+                cache->kernel.set_size(cache_size, samples->size());
+                cache->sample_location.assign(samples->size(), -1);
+
+                // loop over all the samples in the cache
+                for (long i = 0; i < cache_size; ++i)
+                {
+                    const long cur = cache->frequency_of_use[i].second;
+                    cache->sample_location[cur] = i;
+
+                    // now populate all possible kernel products with the current sample
+                    for (long j = 0; j < samples->size(); ++j)
+                    {
+                        cache->kernel(i, j) = real_kernel((*samples)(cur), (*samples)(j));
+                    }
+
+                }
+
+                // reset the frequency of use metrics
+                for (long i = 0; i < samples->size(); ++i)
+                    cache->frequency_of_use[i] = std::make_pair(0, i);
+            }
+
+
+            struct cache_type
+            {
+                matrix<scalar_type> kernel;  
+
+                std::vector<long> sample_location; // where in the cache a sample is.  -1 means not in cache
+                std::vector<std::pair<long,long> > frequency_of_use;  
+            };
+
+            const sample_vector_type* samples = 0;
+
+            std::shared_ptr<cache_type> cache;
+            mutable unsigned long counter = 0;
+            unsigned long counter_threshold = 0;
+            long cache_size = 0;
+        };
+
+    // ------------------------------------------------------------------------------------
+
+    public:
+        typedef typename trainer_type::kernel_type kernel_type;
+        typedef typename trainer_type::scalar_type scalar_type;
+        typedef typename trainer_type::sample_type sample_type;
+        typedef typename trainer_type::mem_manager_type mem_manager_type;
+        typedef typename trainer_type::trained_function_type trained_function_type;
+
+
+        batch_trainer (
+        ) :
+            min_learning_rate(0.1),
+            use_cache(false),
+            cache_size(100)
+        {
+        }
+
+        batch_trainer (
+            const trainer_type& trainer_, 
+            const scalar_type min_learning_rate_,
+            bool verbose_,
+            bool use_cache_,
+            long cache_size_ = 100
+        ) :
+            trainer(trainer_),
+            min_learning_rate(min_learning_rate_),
+            verbose(verbose_),
+            use_cache(use_cache_),
+            cache_size(cache_size_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < min_learning_rate_ &&
+                        cache_size_ > 0,
+                        "\tbatch_trainer::batch_trainer()"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t min_learning_rate_: " << min_learning_rate_ 
+                        << "\n\t cache_size_: " << cache_size_ 
+            );
+            
+            trainer.clear();
+        }
+
+        const scalar_type get_min_learning_rate (
+        ) const 
+        {
+            return min_learning_rate;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            if (use_cache)
+                return do_train_cached(mat(x), mat(y));
+            else
+                return do_train(mat(x), mat(y));
+        }
+
+    private:
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+
+            dlib::rand rnd;
+
+            trainer_type my_trainer(trainer);
+
+            scalar_type cur_learning_rate = min_learning_rate + 10;
+            unsigned long count = 0;
+
+            while (cur_learning_rate > min_learning_rate)
+            {
+                const long i = rnd.get_random_32bit_number()%x.size();
+                // keep feeding the trainer data until its learning rate goes below our threshold
+                cur_learning_rate = my_trainer.train(x(i), y(i));
+
+                if (verbose)
+                {
+                    if ( (count&0x7FF) == 0)
+                    {
+                        std::cout << "\rbatch_trainer(): Percent complete: " 
+                                  << 100*min_learning_rate/cur_learning_rate << "             " << std::flush;
+                    }
+                    ++count;
+                }
+            }
+
+            if (verbose)
+            {
+                decision_function<kernel_type> df = my_trainer.get_decision_function();
+                std::cout << "\rbatch_trainer(): Percent complete: 100           " << std::endl;
+                std::cout << "    Num sv: " << df.basis_vectors.size() << std::endl;
+                std::cout << "    bias:   " << df.b << std::endl;
+                return df;
+            }
+            else
+            {
+                return my_trainer.get_decision_function();
+            }
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train_cached (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+
+            dlib::rand rnd;
+
+            // make a caching kernel
+            typedef caching_kernel<kernel_type, in_sample_vector_type> ckernel_type;
+            ckernel_type ck(trainer.get_kernel(), x, cache_size);
+
+            // now rebind the trainer to use the caching kernel
+            typedef typename trainer_type::template rebind<ckernel_type>::other rebound_trainer_type;
+            rebound_trainer_type my_trainer;
+            my_trainer.set_kernel(ck);
+            replicate_settings(trainer, my_trainer);
+
+            scalar_type cur_learning_rate = min_learning_rate + 10;
+            unsigned long count = 0;
+
+            while (cur_learning_rate > min_learning_rate)
+            {
+                const long i = rnd.get_random_32bit_number()%x.size();
+                // keep feeding the trainer data until its learning rate goes below our threshold
+                cur_learning_rate = my_trainer.train(i, y(i));
+
+                if (verbose)
+                {
+                    if ( (count&0x7FF) == 0)
+                    {
+                        std::cout << "\rbatch_trainer(): Percent complete: " 
+                                  << 100*min_learning_rate/cur_learning_rate << "             " << std::flush;
+                    }
+                    ++count;
+                }
+            }
+
+            if (verbose)
+            {
+                decision_function<ckernel_type> cached_df;
+                cached_df = my_trainer.get_decision_function();
+
+                std::cout << "\rbatch_trainer(): Percent complete: 100           " << std::endl;
+                std::cout << "    Num sv: " << cached_df.basis_vectors.size() << std::endl;
+                std::cout << "    bias:   " << cached_df.b << std::endl;
+
+                return decision_function<kernel_type> (
+                        cached_df.alpha,
+                        cached_df.b,
+                        trainer.get_kernel(),
+                        rowm(x, cached_df.basis_vectors)
+                        );
+            }
+            else
+            {
+                decision_function<ckernel_type> cached_df;
+                cached_df = my_trainer.get_decision_function();
+
+                return decision_function<kernel_type> (
+                        cached_df.alpha,
+                        cached_df.b,
+                        trainer.get_kernel(),
+                        rowm(x, cached_df.basis_vectors)
+                        );
+            }
+        }
+
+        trainer_type trainer;
+        scalar_type min_learning_rate;
+        bool verbose;
+        bool use_cache;
+        long cache_size;
+
+    }; // end of class batch_trainer
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const batch_trainer<trainer_type> batch (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type min_learning_rate = 0.1
+    ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, false); }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const batch_trainer<trainer_type> verbose_batch (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type min_learning_rate = 0.1
+    ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, false); }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const batch_trainer<trainer_type> batch_cached (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type min_learning_rate = 0.1,
+        long cache_size = 100
+    ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, true, cache_size); }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const batch_trainer<trainer_type> verbose_batch_cached (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type min_learning_rate = 0.1,
+        long cache_size = 100
+    ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, true, cache_size); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_PEGASoS_
+