summaryrefslogtreecommitdiffstats
path: root/ml/dlib/dlib/svm/pegasos.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ml/dlib/dlib/svm/pegasos.h710
1 files changed, 710 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/pegasos.h b/ml/dlib/dlib/svm/pegasos.h
new file mode 100644
index 000000000..c28093fe0
--- /dev/null
+++ b/ml/dlib/dlib/svm/pegasos.h
@@ -0,0 +1,710 @@
+// Copyright (C) 2009 Davis E. King (davis@dlib.net)
+// License: Boost Software License See LICENSE.txt for the full license.
+#ifndef DLIB_PEGASoS_
+#define DLIB_PEGASoS_
+
+#include "pegasos_abstract.h"
+#include <cmath>
+#include "../algs.h"
+#include "function.h"
+#include "kernel.h"
+#include "kcentroid.h"
+#include <iostream>
+#include <memory>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename K
+ >
+ class svm_pegasos
+ {
+ typedef kcentroid<offset_kernel<K> > kc_type;
+
+ public:
+ typedef K kernel_type;
+ typedef typename kernel_type::scalar_type scalar_type;
+ typedef typename kernel_type::sample_type sample_type;
+ typedef typename kernel_type::mem_manager_type mem_manager_type;
+ typedef decision_function<kernel_type> trained_function_type;
+
+ template <typename K_>
+ struct rebind {
+ typedef svm_pegasos<K_> other;
+ };
+
+ svm_pegasos (
+ ) :
+ max_sv(40),
+ lambda_c1(0.0001),
+ lambda_c2(0.0001),
+ tau(0.01),
+ tolerance(0.01),
+ train_count(0),
+ w(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false)
+ {
+ }
+
+ svm_pegasos (
+ const kernel_type& kernel_,
+ const scalar_type& lambda_,
+ const scalar_type& tolerance_,
+ unsigned long max_num_sv
+ ) :
+ max_sv(max_num_sv),
+ kernel(kernel_),
+ lambda_c1(lambda_),
+ lambda_c2(lambda_),
+ tau(0.01),
+ tolerance(tolerance_),
+ train_count(0),
+ w(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false)
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(lambda_ > 0 && tolerance > 0 && max_num_sv > 0,
+ "\tsvm_pegasos::svm_pegasos(kernel,lambda,tolerance)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t lambda_: " << lambda_
+ << "\n\t max_num_sv: " << max_num_sv
+ );
+ }
+
+ void clear (
+ )
+ {
+ // reset the w vector back to its initial state
+ w = kc_type(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false);
+ train_count = 0;
+ }
+
+ void set_kernel (
+ kernel_type k
+ )
+ {
+ kernel = k;
+ clear();
+ }
+
+ void set_max_num_sv (
+ unsigned long max_num_sv
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(max_num_sv > 0,
+ "\tvoid svm_pegasos::set_max_num_sv(max_num_sv)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t max_num_sv: " << max_num_sv
+ );
+ max_sv = max_num_sv;
+ clear();
+ }
+
+ unsigned long get_max_num_sv (
+ ) const
+ {
+ return max_sv;
+ }
+
+ void set_tolerance (
+ double tol
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(0 < tol,
+ "\tvoid svm_pegasos::set_tolerance(tol)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t tol: " << tol
+ );
+ tolerance = tol;
+ clear();
+ }
+
+ void set_lambda (
+ scalar_type lambda_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(0 < lambda_,
+ "\tvoid svm_pegasos::set_lambda(lambda_)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t lambda_: " << lambda_
+ );
+ lambda_c1 = lambda_;
+ lambda_c2 = lambda_;
+
+ max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2));
+ clear();
+ }
+
+ void set_lambda_class1 (
+ scalar_type lambda_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(0 < lambda_,
+ "\tvoid svm_pegasos::set_lambda_class1(lambda_)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t lambda_: " << lambda_
+ );
+ lambda_c1 = lambda_;
+ max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2));
+ clear();
+ }
+
+ void set_lambda_class2 (
+ scalar_type lambda_
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(0 < lambda_,
+ "\tvoid svm_pegasos::set_lambda_class2(lambda_)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t lambda_: " << lambda_
+ );
+ lambda_c2 = lambda_;
+ max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2));
+ clear();
+ }
+
+ const scalar_type get_lambda_class1 (
+ ) const
+ {
+ return lambda_c1;
+ }
+
+ const scalar_type get_lambda_class2 (
+ ) const
+ {
+ return lambda_c2;
+ }
+
+ const scalar_type get_tolerance (
+ ) const
+ {
+ return tolerance;
+ }
+
+ const kernel_type get_kernel (
+ ) const
+ {
+ return kernel;
+ }
+
+ unsigned long get_train_count (
+ ) const
+ {
+ return static_cast<unsigned long>(train_count);
+ }
+
+ scalar_type train (
+ const sample_type& x,
+ const scalar_type& y
+ )
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(y == -1 || y == 1,
+ "\tscalar_type svm_pegasos::train(x,y)"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t y: " << y
+ );
+
+ const double lambda = (y==+1)? lambda_c1 : lambda_c2;
+
+ ++train_count;
+ const scalar_type learning_rate = 1/(lambda*train_count);
+
+ // if this sample point is within the margin of the current hyperplane
+ if (y*w.inner_product(x) < 1)
+ {
+
+ // compute: w = (1-learning_rate*lambda)*w + y*learning_rate*x
+ w.train(x, 1 - learning_rate*lambda, y*learning_rate);
+
+ scalar_type wnorm = std::sqrt(w.squared_norm());
+ scalar_type temp = max_wnorm/wnorm;
+ if (temp < 1)
+ w.scale_by(temp);
+ }
+ else
+ {
+ w.scale_by(1 - learning_rate*lambda);
+ }
+
+ // return the current learning rate
+ return 1/(std::min(lambda_c1,lambda_c2)*train_count);
+ }
+
+ scalar_type operator() (
+ const sample_type& x
+ ) const
+ {
+ return w.inner_product(x);
+ }
+
+ const decision_function<kernel_type> get_decision_function (
+ ) const
+ {
+ distance_function<offset_kernel<kernel_type> > df = w.get_distance_function();
+ return decision_function<kernel_type>(df.get_alpha(), -tau*sum(df.get_alpha()), kernel, df.get_basis_vectors());
+ }
+
+ void swap (
+ svm_pegasos& item
+ )
+ {
+ exchange(max_sv, item.max_sv);
+ exchange(kernel, item.kernel);
+ exchange(lambda_c1, item.lambda_c1);
+ exchange(lambda_c2, item.lambda_c2);
+ exchange(max_wnorm, item.max_wnorm);
+ exchange(tau, item.tau);
+ exchange(tolerance, item.tolerance);
+ exchange(train_count, item.train_count);
+ exchange(w, item.w);
+ }
+
+ friend void serialize(const svm_pegasos& item, std::ostream& out)
+ {
+ serialize(item.max_sv, out);
+ serialize(item.kernel, out);
+ serialize(item.lambda_c1, out);
+ serialize(item.lambda_c2, out);
+ serialize(item.max_wnorm, out);
+ serialize(item.tau, out);
+ serialize(item.tolerance, out);
+ serialize(item.train_count, out);
+ serialize(item.w, out);
+ }
+
+ friend void deserialize(svm_pegasos& item, std::istream& in)
+ {
+ deserialize(item.max_sv, in);
+ deserialize(item.kernel, in);
+ deserialize(item.lambda_c1, in);
+ deserialize(item.lambda_c2, in);
+ deserialize(item.max_wnorm, in);
+ deserialize(item.tau, in);
+ deserialize(item.tolerance, in);
+ deserialize(item.train_count, in);
+ deserialize(item.w, in);
+ }
+
+ private:
+
+ unsigned long max_sv;
+ kernel_type kernel;
+ scalar_type lambda_c1;
+ scalar_type lambda_c2;
+ scalar_type max_wnorm;
+ scalar_type tau;
+ scalar_type tolerance;
+ scalar_type train_count;
+ kc_type w;
+
+ }; // end of class svm_pegasos
+
+ template <
+ typename K
+ >
+ void swap (
+ svm_pegasos<K>& a,
+ svm_pegasos<K>& b
+ ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename T,
+ typename U
+ >
+ void replicate_settings (
+ const svm_pegasos<T>& source,
+ svm_pegasos<U>& dest
+ )
+ {
+ dest.set_tolerance(source.get_tolerance());
+ dest.set_lambda_class1(source.get_lambda_class1());
+ dest.set_lambda_class2(source.get_lambda_class2());
+ dest.set_max_num_sv(source.get_max_num_sv());
+ }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename trainer_type
+ >
+ class batch_trainer
+ {
+
+ // ------------------------------------------------------------------------------------
+
+ template <
+ typename K,
+ typename sample_vector_type
+ >
+ class caching_kernel
+ {
+ public:
+ typedef typename K::scalar_type scalar_type;
+ typedef long sample_type;
+ //typedef typename K::sample_type sample_type;
+ typedef typename K::mem_manager_type mem_manager_type;
+
+ caching_kernel () {}
+
+ caching_kernel (
+ const K& kern,
+ const sample_vector_type& samps,
+ long cache_size_
+ ) : real_kernel(kern), samples(&samps), counter(0)
+ {
+ cache_size = std::min<long>(cache_size_, samps.size());
+
+ cache.reset(new cache_type);
+ cache->frequency_of_use.resize(samps.size());
+ for (long i = 0; i < samps.size(); ++i)
+ cache->frequency_of_use[i] = std::make_pair(0, i);
+
+ // Set the cache build/rebuild threshold so that we have to have
+ // as many cache misses as there are entries in the cache before
+ // we build/rebuild.
+ counter_threshold = samps.size()*cache_size;
+ cache->sample_location.assign(samples->size(), -1);
+ }
+
+ scalar_type operator() (
+ const sample_type& a,
+ const sample_type& b
+ ) const
+ {
+ // rebuild the cache every so often
+ if (counter > counter_threshold )
+ {
+ build_cache();
+ }
+
+ const long a_loc = cache->sample_location[a];
+ const long b_loc = cache->sample_location[b];
+
+ cache->frequency_of_use[a].first += 1;
+ cache->frequency_of_use[b].first += 1;
+
+ if (a_loc != -1)
+ {
+ return cache->kernel(a_loc, b);
+ }
+ else if (b_loc != -1)
+ {
+ return cache->kernel(b_loc, a);
+ }
+ else
+ {
+ ++counter;
+ return real_kernel((*samples)(a), (*samples)(b));
+ }
+ }
+
+ bool operator== (
+ const caching_kernel& item
+ ) const
+ {
+ return item.real_kernel == real_kernel &&
+ item.samples == samples;
+ }
+
+ private:
+ K real_kernel;
+
+ void build_cache (
+ ) const
+ {
+ std::sort(cache->frequency_of_use.rbegin(), cache->frequency_of_use.rend());
+ counter = 0;
+
+
+ cache->kernel.set_size(cache_size, samples->size());
+ cache->sample_location.assign(samples->size(), -1);
+
+ // loop over all the samples in the cache
+ for (long i = 0; i < cache_size; ++i)
+ {
+ const long cur = cache->frequency_of_use[i].second;
+ cache->sample_location[cur] = i;
+
+ // now populate all possible kernel products with the current sample
+ for (long j = 0; j < samples->size(); ++j)
+ {
+ cache->kernel(i, j) = real_kernel((*samples)(cur), (*samples)(j));
+ }
+
+ }
+
+ // reset the frequency of use metrics
+ for (long i = 0; i < samples->size(); ++i)
+ cache->frequency_of_use[i] = std::make_pair(0, i);
+ }
+
+
+ struct cache_type
+ {
+ matrix<scalar_type> kernel;
+
+ std::vector<long> sample_location; // where in the cache a sample is. -1 means not in cache
+ std::vector<std::pair<long,long> > frequency_of_use;
+ };
+
+ const sample_vector_type* samples = 0;
+
+ std::shared_ptr<cache_type> cache;
+ mutable unsigned long counter = 0;
+ unsigned long counter_threshold = 0;
+ long cache_size = 0;
+ };
+
+ // ------------------------------------------------------------------------------------
+
+ public:
+ typedef typename trainer_type::kernel_type kernel_type;
+ typedef typename trainer_type::scalar_type scalar_type;
+ typedef typename trainer_type::sample_type sample_type;
+ typedef typename trainer_type::mem_manager_type mem_manager_type;
+ typedef typename trainer_type::trained_function_type trained_function_type;
+
+
+ batch_trainer (
+ ) :
+ min_learning_rate(0.1),
+ use_cache(false),
+ cache_size(100)
+ {
+ }
+
+ batch_trainer (
+ const trainer_type& trainer_,
+ const scalar_type min_learning_rate_,
+ bool verbose_,
+ bool use_cache_,
+ long cache_size_ = 100
+ ) :
+ trainer(trainer_),
+ min_learning_rate(min_learning_rate_),
+ verbose(verbose_),
+ use_cache(use_cache_),
+ cache_size(cache_size_)
+ {
+ // make sure requires clause is not broken
+ DLIB_ASSERT(0 < min_learning_rate_ &&
+ cache_size_ > 0,
+ "\tbatch_trainer::batch_trainer()"
+ << "\n\t invalid inputs were given to this function"
+ << "\n\t min_learning_rate_: " << min_learning_rate_
+ << "\n\t cache_size_: " << cache_size_
+ );
+
+ trainer.clear();
+ }
+
+ const scalar_type get_min_learning_rate (
+ ) const
+ {
+ return min_learning_rate;
+ }
+
+ template <
+ typename in_sample_vector_type,
+ typename in_scalar_vector_type
+ >
+ const decision_function<kernel_type> train (
+ const in_sample_vector_type& x,
+ const in_scalar_vector_type& y
+ ) const
+ {
+ if (use_cache)
+ return do_train_cached(mat(x), mat(y));
+ else
+ return do_train(mat(x), mat(y));
+ }
+
+ private:
+
+ template <
+ typename in_sample_vector_type,
+ typename in_scalar_vector_type
+ >
+ const decision_function<kernel_type> do_train (
+ const in_sample_vector_type& x,
+ const in_scalar_vector_type& y
+ ) const
+ {
+
+ dlib::rand rnd;
+
+ trainer_type my_trainer(trainer);
+
+ scalar_type cur_learning_rate = min_learning_rate + 10;
+ unsigned long count = 0;
+
+ while (cur_learning_rate > min_learning_rate)
+ {
+ const long i = rnd.get_random_32bit_number()%x.size();
+ // keep feeding the trainer data until its learning rate goes below our threshold
+ cur_learning_rate = my_trainer.train(x(i), y(i));
+
+ if (verbose)
+ {
+ if ( (count&0x7FF) == 0)
+ {
+ std::cout << "\rbatch_trainer(): Percent complete: "
+ << 100*min_learning_rate/cur_learning_rate << " " << std::flush;
+ }
+ ++count;
+ }
+ }
+
+ if (verbose)
+ {
+ decision_function<kernel_type> df = my_trainer.get_decision_function();
+ std::cout << "\rbatch_trainer(): Percent complete: 100 " << std::endl;
+ std::cout << " Num sv: " << df.basis_vectors.size() << std::endl;
+ std::cout << " bias: " << df.b << std::endl;
+ return df;
+ }
+ else
+ {
+ return my_trainer.get_decision_function();
+ }
+ }
+
+ template <
+ typename in_sample_vector_type,
+ typename in_scalar_vector_type
+ >
+ const decision_function<kernel_type> do_train_cached (
+ const in_sample_vector_type& x,
+ const in_scalar_vector_type& y
+ ) const
+ {
+
+ dlib::rand rnd;
+
+ // make a caching kernel
+ typedef caching_kernel<kernel_type, in_sample_vector_type> ckernel_type;
+ ckernel_type ck(trainer.get_kernel(), x, cache_size);
+
+ // now rebind the trainer to use the caching kernel
+ typedef typename trainer_type::template rebind<ckernel_type>::other rebound_trainer_type;
+ rebound_trainer_type my_trainer;
+ my_trainer.set_kernel(ck);
+ replicate_settings(trainer, my_trainer);
+
+ scalar_type cur_learning_rate = min_learning_rate + 10;
+ unsigned long count = 0;
+
+ while (cur_learning_rate > min_learning_rate)
+ {
+ const long i = rnd.get_random_32bit_number()%x.size();
+ // keep feeding the trainer data until its learning rate goes below our threshold
+ cur_learning_rate = my_trainer.train(i, y(i));
+
+ if (verbose)
+ {
+ if ( (count&0x7FF) == 0)
+ {
+ std::cout << "\rbatch_trainer(): Percent complete: "
+ << 100*min_learning_rate/cur_learning_rate << " " << std::flush;
+ }
+ ++count;
+ }
+ }
+
+ if (verbose)
+ {
+ decision_function<ckernel_type> cached_df;
+ cached_df = my_trainer.get_decision_function();
+
+ std::cout << "\rbatch_trainer(): Percent complete: 100 " << std::endl;
+ std::cout << " Num sv: " << cached_df.basis_vectors.size() << std::endl;
+ std::cout << " bias: " << cached_df.b << std::endl;
+
+ return decision_function<kernel_type> (
+ cached_df.alpha,
+ cached_df.b,
+ trainer.get_kernel(),
+ rowm(x, cached_df.basis_vectors)
+ );
+ }
+ else
+ {
+ decision_function<ckernel_type> cached_df;
+ cached_df = my_trainer.get_decision_function();
+
+ return decision_function<kernel_type> (
+ cached_df.alpha,
+ cached_df.b,
+ trainer.get_kernel(),
+ rowm(x, cached_df.basis_vectors)
+ );
+ }
+ }
+
+ trainer_type trainer;
+ scalar_type min_learning_rate;
+ bool verbose;
+ bool use_cache;
+ long cache_size;
+
+ }; // end of class batch_trainer
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename trainer_type
+ >
+ const batch_trainer<trainer_type> batch (
+ const trainer_type& trainer,
+ const typename trainer_type::scalar_type min_learning_rate = 0.1
+ ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, false); }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename trainer_type
+ >
+ const batch_trainer<trainer_type> verbose_batch (
+ const trainer_type& trainer,
+ const typename trainer_type::scalar_type min_learning_rate = 0.1
+ ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, false); }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename trainer_type
+ >
+ const batch_trainer<trainer_type> batch_cached (
+ const trainer_type& trainer,
+ const typename trainer_type::scalar_type min_learning_rate = 0.1,
+ long cache_size = 100
+ ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, true, cache_size); }
+
+// ----------------------------------------------------------------------------------------
+
+ template <
+ typename trainer_type
+ >
+ const batch_trainer<trainer_type> verbose_batch_cached (
+ const trainer_type& trainer,
+ const typename trainer_type::scalar_type min_learning_rate = 0.1,
+ long cache_size = 100
+ ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, true, cache_size); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_PEGASoS_
+