diff options
Diffstat (limited to '')
-rw-r--r-- | ml/dlib/dlib/svm/pegasos.h | 710 |
1 files changed, 710 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/pegasos.h b/ml/dlib/dlib/svm/pegasos.h new file mode 100644 index 000000000..c28093fe0 --- /dev/null +++ b/ml/dlib/dlib/svm/pegasos.h @@ -0,0 +1,710 @@ +// Copyright (C) 2009 Davis E. King (davis@dlib.net) +// License: Boost Software License See LICENSE.txt for the full license. +#ifndef DLIB_PEGASoS_ +#define DLIB_PEGASoS_ + +#include "pegasos_abstract.h" +#include <cmath> +#include "../algs.h" +#include "function.h" +#include "kernel.h" +#include "kcentroid.h" +#include <iostream> +#include <memory> + +namespace dlib +{ + +// ---------------------------------------------------------------------------------------- + + template < + typename K + > + class svm_pegasos + { + typedef kcentroid<offset_kernel<K> > kc_type; + + public: + typedef K kernel_type; + typedef typename kernel_type::scalar_type scalar_type; + typedef typename kernel_type::sample_type sample_type; + typedef typename kernel_type::mem_manager_type mem_manager_type; + typedef decision_function<kernel_type> trained_function_type; + + template <typename K_> + struct rebind { + typedef svm_pegasos<K_> other; + }; + + svm_pegasos ( + ) : + max_sv(40), + lambda_c1(0.0001), + lambda_c2(0.0001), + tau(0.01), + tolerance(0.01), + train_count(0), + w(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false) + { + } + + svm_pegasos ( + const kernel_type& kernel_, + const scalar_type& lambda_, + const scalar_type& tolerance_, + unsigned long max_num_sv + ) : + max_sv(max_num_sv), + kernel(kernel_), + lambda_c1(lambda_), + lambda_c2(lambda_), + tau(0.01), + tolerance(tolerance_), + train_count(0), + w(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false) + { + // make sure requires clause is not broken + DLIB_ASSERT(lambda_ > 0 && tolerance > 0 && max_num_sv > 0, + "\tsvm_pegasos::svm_pegasos(kernel,lambda,tolerance)" + << "\n\t invalid inputs were given to this function" + << "\n\t lambda_: " << lambda_ + << "\n\t max_num_sv: " << max_num_sv + ); + } + + void clear ( + ) + { + // reset the w vector back to its initial state + w = kc_type(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false); + train_count = 0; + } + + void set_kernel ( + kernel_type k + ) + { + kernel = k; + clear(); + } + + void set_max_num_sv ( + unsigned long max_num_sv + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(max_num_sv > 0, + "\tvoid svm_pegasos::set_max_num_sv(max_num_sv)" + << "\n\t invalid inputs were given to this function" + << "\n\t max_num_sv: " << max_num_sv + ); + max_sv = max_num_sv; + clear(); + } + + unsigned long get_max_num_sv ( + ) const + { + return max_sv; + } + + void set_tolerance ( + double tol + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(0 < tol, + "\tvoid svm_pegasos::set_tolerance(tol)" + << "\n\t invalid inputs were given to this function" + << "\n\t tol: " << tol + ); + tolerance = tol; + clear(); + } + + void set_lambda ( + scalar_type lambda_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(0 < lambda_, + "\tvoid svm_pegasos::set_lambda(lambda_)" + << "\n\t invalid inputs were given to this function" + << "\n\t lambda_: " << lambda_ + ); + lambda_c1 = lambda_; + lambda_c2 = lambda_; + + max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2)); + clear(); + } + + void set_lambda_class1 ( + scalar_type lambda_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(0 < lambda_, + "\tvoid svm_pegasos::set_lambda_class1(lambda_)" + << "\n\t invalid inputs were given to this function" + << "\n\t lambda_: " << lambda_ + ); + lambda_c1 = lambda_; + max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2)); + clear(); + } + + void set_lambda_class2 ( + scalar_type lambda_ + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(0 < lambda_, + "\tvoid svm_pegasos::set_lambda_class2(lambda_)" + << "\n\t invalid inputs were given to this function" + << "\n\t lambda_: " << lambda_ + ); + lambda_c2 = lambda_; + max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2)); + clear(); + } + + const scalar_type get_lambda_class1 ( + ) const + { + return lambda_c1; + } + + const scalar_type get_lambda_class2 ( + ) const + { + return lambda_c2; + } + + const scalar_type get_tolerance ( + ) const + { + return tolerance; + } + + const kernel_type get_kernel ( + ) const + { + return kernel; + } + + unsigned long get_train_count ( + ) const + { + return static_cast<unsigned long>(train_count); + } + + scalar_type train ( + const sample_type& x, + const scalar_type& y + ) + { + // make sure requires clause is not broken + DLIB_ASSERT(y == -1 || y == 1, + "\tscalar_type svm_pegasos::train(x,y)" + << "\n\t invalid inputs were given to this function" + << "\n\t y: " << y + ); + + const double lambda = (y==+1)? lambda_c1 : lambda_c2; + + ++train_count; + const scalar_type learning_rate = 1/(lambda*train_count); + + // if this sample point is within the margin of the current hyperplane + if (y*w.inner_product(x) < 1) + { + + // compute: w = (1-learning_rate*lambda)*w + y*learning_rate*x + w.train(x, 1 - learning_rate*lambda, y*learning_rate); + + scalar_type wnorm = std::sqrt(w.squared_norm()); + scalar_type temp = max_wnorm/wnorm; + if (temp < 1) + w.scale_by(temp); + } + else + { + w.scale_by(1 - learning_rate*lambda); + } + + // return the current learning rate + return 1/(std::min(lambda_c1,lambda_c2)*train_count); + } + + scalar_type operator() ( + const sample_type& x + ) const + { + return w.inner_product(x); + } + + const decision_function<kernel_type> get_decision_function ( + ) const + { + distance_function<offset_kernel<kernel_type> > df = w.get_distance_function(); + return decision_function<kernel_type>(df.get_alpha(), -tau*sum(df.get_alpha()), kernel, df.get_basis_vectors()); + } + + void swap ( + svm_pegasos& item + ) + { + exchange(max_sv, item.max_sv); + exchange(kernel, item.kernel); + exchange(lambda_c1, item.lambda_c1); + exchange(lambda_c2, item.lambda_c2); + exchange(max_wnorm, item.max_wnorm); + exchange(tau, item.tau); + exchange(tolerance, item.tolerance); + exchange(train_count, item.train_count); + exchange(w, item.w); + } + + friend void serialize(const svm_pegasos& item, std::ostream& out) + { + serialize(item.max_sv, out); + serialize(item.kernel, out); + serialize(item.lambda_c1, out); + serialize(item.lambda_c2, out); + serialize(item.max_wnorm, out); + serialize(item.tau, out); + serialize(item.tolerance, out); + serialize(item.train_count, out); + serialize(item.w, out); + } + + friend void deserialize(svm_pegasos& item, std::istream& in) + { + deserialize(item.max_sv, in); + deserialize(item.kernel, in); + deserialize(item.lambda_c1, in); + deserialize(item.lambda_c2, in); + deserialize(item.max_wnorm, in); + deserialize(item.tau, in); + deserialize(item.tolerance, in); + deserialize(item.train_count, in); + deserialize(item.w, in); + } + + private: + + unsigned long max_sv; + kernel_type kernel; + scalar_type lambda_c1; + scalar_type lambda_c2; + scalar_type max_wnorm; + scalar_type tau; + scalar_type tolerance; + scalar_type train_count; + kc_type w; + + }; // end of class svm_pegasos + + template < + typename K + > + void swap ( + svm_pegasos<K>& a, + svm_pegasos<K>& b + ) { a.swap(b); } + +// ---------------------------------------------------------------------------------------- + + template < + typename T, + typename U + > + void replicate_settings ( + const svm_pegasos<T>& source, + svm_pegasos<U>& dest + ) + { + dest.set_tolerance(source.get_tolerance()); + dest.set_lambda_class1(source.get_lambda_class1()); + dest.set_lambda_class2(source.get_lambda_class2()); + dest.set_max_num_sv(source.get_max_num_sv()); + } + +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- +// ---------------------------------------------------------------------------------------- + + template < + typename trainer_type + > + class batch_trainer + { + + // ------------------------------------------------------------------------------------ + + template < + typename K, + typename sample_vector_type + > + class caching_kernel + { + public: + typedef typename K::scalar_type scalar_type; + typedef long sample_type; + //typedef typename K::sample_type sample_type; + typedef typename K::mem_manager_type mem_manager_type; + + caching_kernel () {} + + caching_kernel ( + const K& kern, + const sample_vector_type& samps, + long cache_size_ + ) : real_kernel(kern), samples(&samps), counter(0) + { + cache_size = std::min<long>(cache_size_, samps.size()); + + cache.reset(new cache_type); + cache->frequency_of_use.resize(samps.size()); + for (long i = 0; i < samps.size(); ++i) + cache->frequency_of_use[i] = std::make_pair(0, i); + + // Set the cache build/rebuild threshold so that we have to have + // as many cache misses as there are entries in the cache before + // we build/rebuild. + counter_threshold = samps.size()*cache_size; + cache->sample_location.assign(samples->size(), -1); + } + + scalar_type operator() ( + const sample_type& a, + const sample_type& b + ) const + { + // rebuild the cache every so often + if (counter > counter_threshold ) + { + build_cache(); + } + + const long a_loc = cache->sample_location[a]; + const long b_loc = cache->sample_location[b]; + + cache->frequency_of_use[a].first += 1; + cache->frequency_of_use[b].first += 1; + + if (a_loc != -1) + { + return cache->kernel(a_loc, b); + } + else if (b_loc != -1) + { + return cache->kernel(b_loc, a); + } + else + { + ++counter; + return real_kernel((*samples)(a), (*samples)(b)); + } + } + + bool operator== ( + const caching_kernel& item + ) const + { + return item.real_kernel == real_kernel && + item.samples == samples; + } + + private: + K real_kernel; + + void build_cache ( + ) const + { + std::sort(cache->frequency_of_use.rbegin(), cache->frequency_of_use.rend()); + counter = 0; + + + cache->kernel.set_size(cache_size, samples->size()); + cache->sample_location.assign(samples->size(), -1); + + // loop over all the samples in the cache + for (long i = 0; i < cache_size; ++i) + { + const long cur = cache->frequency_of_use[i].second; + cache->sample_location[cur] = i; + + // now populate all possible kernel products with the current sample + for (long j = 0; j < samples->size(); ++j) + { + cache->kernel(i, j) = real_kernel((*samples)(cur), (*samples)(j)); + } + + } + + // reset the frequency of use metrics + for (long i = 0; i < samples->size(); ++i) + cache->frequency_of_use[i] = std::make_pair(0, i); + } + + + struct cache_type + { + matrix<scalar_type> kernel; + + std::vector<long> sample_location; // where in the cache a sample is. -1 means not in cache + std::vector<std::pair<long,long> > frequency_of_use; + }; + + const sample_vector_type* samples = 0; + + std::shared_ptr<cache_type> cache; + mutable unsigned long counter = 0; + unsigned long counter_threshold = 0; + long cache_size = 0; + }; + + // ------------------------------------------------------------------------------------ + + public: + typedef typename trainer_type::kernel_type kernel_type; + typedef typename trainer_type::scalar_type scalar_type; + typedef typename trainer_type::sample_type sample_type; + typedef typename trainer_type::mem_manager_type mem_manager_type; + typedef typename trainer_type::trained_function_type trained_function_type; + + + batch_trainer ( + ) : + min_learning_rate(0.1), + use_cache(false), + cache_size(100) + { + } + + batch_trainer ( + const trainer_type& trainer_, + const scalar_type min_learning_rate_, + bool verbose_, + bool use_cache_, + long cache_size_ = 100 + ) : + trainer(trainer_), + min_learning_rate(min_learning_rate_), + verbose(verbose_), + use_cache(use_cache_), + cache_size(cache_size_) + { + // make sure requires clause is not broken + DLIB_ASSERT(0 < min_learning_rate_ && + cache_size_ > 0, + "\tbatch_trainer::batch_trainer()" + << "\n\t invalid inputs were given to this function" + << "\n\t min_learning_rate_: " << min_learning_rate_ + << "\n\t cache_size_: " << cache_size_ + ); + + trainer.clear(); + } + + const scalar_type get_min_learning_rate ( + ) const + { + return min_learning_rate; + } + + template < + typename in_sample_vector_type, + typename in_scalar_vector_type + > + const decision_function<kernel_type> train ( + const in_sample_vector_type& x, + const in_scalar_vector_type& y + ) const + { + if (use_cache) + return do_train_cached(mat(x), mat(y)); + else + return do_train(mat(x), mat(y)); + } + + private: + + template < + typename in_sample_vector_type, + typename in_scalar_vector_type + > + const decision_function<kernel_type> do_train ( + const in_sample_vector_type& x, + const in_scalar_vector_type& y + ) const + { + + dlib::rand rnd; + + trainer_type my_trainer(trainer); + + scalar_type cur_learning_rate = min_learning_rate + 10; + unsigned long count = 0; + + while (cur_learning_rate > min_learning_rate) + { + const long i = rnd.get_random_32bit_number()%x.size(); + // keep feeding the trainer data until its learning rate goes below our threshold + cur_learning_rate = my_trainer.train(x(i), y(i)); + + if (verbose) + { + if ( (count&0x7FF) == 0) + { + std::cout << "\rbatch_trainer(): Percent complete: " + << 100*min_learning_rate/cur_learning_rate << " " << std::flush; + } + ++count; + } + } + + if (verbose) + { + decision_function<kernel_type> df = my_trainer.get_decision_function(); + std::cout << "\rbatch_trainer(): Percent complete: 100 " << std::endl; + std::cout << " Num sv: " << df.basis_vectors.size() << std::endl; + std::cout << " bias: " << df.b << std::endl; + return df; + } + else + { + return my_trainer.get_decision_function(); + } + } + + template < + typename in_sample_vector_type, + typename in_scalar_vector_type + > + const decision_function<kernel_type> do_train_cached ( + const in_sample_vector_type& x, + const in_scalar_vector_type& y + ) const + { + + dlib::rand rnd; + + // make a caching kernel + typedef caching_kernel<kernel_type, in_sample_vector_type> ckernel_type; + ckernel_type ck(trainer.get_kernel(), x, cache_size); + + // now rebind the trainer to use the caching kernel + typedef typename trainer_type::template rebind<ckernel_type>::other rebound_trainer_type; + rebound_trainer_type my_trainer; + my_trainer.set_kernel(ck); + replicate_settings(trainer, my_trainer); + + scalar_type cur_learning_rate = min_learning_rate + 10; + unsigned long count = 0; + + while (cur_learning_rate > min_learning_rate) + { + const long i = rnd.get_random_32bit_number()%x.size(); + // keep feeding the trainer data until its learning rate goes below our threshold + cur_learning_rate = my_trainer.train(i, y(i)); + + if (verbose) + { + if ( (count&0x7FF) == 0) + { + std::cout << "\rbatch_trainer(): Percent complete: " + << 100*min_learning_rate/cur_learning_rate << " " << std::flush; + } + ++count; + } + } + + if (verbose) + { + decision_function<ckernel_type> cached_df; + cached_df = my_trainer.get_decision_function(); + + std::cout << "\rbatch_trainer(): Percent complete: 100 " << std::endl; + std::cout << " Num sv: " << cached_df.basis_vectors.size() << std::endl; + std::cout << " bias: " << cached_df.b << std::endl; + + return decision_function<kernel_type> ( + cached_df.alpha, + cached_df.b, + trainer.get_kernel(), + rowm(x, cached_df.basis_vectors) + ); + } + else + { + decision_function<ckernel_type> cached_df; + cached_df = my_trainer.get_decision_function(); + + return decision_function<kernel_type> ( + cached_df.alpha, + cached_df.b, + trainer.get_kernel(), + rowm(x, cached_df.basis_vectors) + ); + } + } + + trainer_type trainer; + scalar_type min_learning_rate; + bool verbose; + bool use_cache; + long cache_size; + + }; // end of class batch_trainer + +// ---------------------------------------------------------------------------------------- + + template < + typename trainer_type + > + const batch_trainer<trainer_type> batch ( + const trainer_type& trainer, + const typename trainer_type::scalar_type min_learning_rate = 0.1 + ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, false); } + +// ---------------------------------------------------------------------------------------- + + template < + typename trainer_type + > + const batch_trainer<trainer_type> verbose_batch ( + const trainer_type& trainer, + const typename trainer_type::scalar_type min_learning_rate = 0.1 + ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, false); } + +// ---------------------------------------------------------------------------------------- + + template < + typename trainer_type + > + const batch_trainer<trainer_type> batch_cached ( + const trainer_type& trainer, + const typename trainer_type::scalar_type min_learning_rate = 0.1, + long cache_size = 100 + ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, true, cache_size); } + +// ---------------------------------------------------------------------------------------- + + template < + typename trainer_type + > + const batch_trainer<trainer_type> verbose_batch_cached ( + const trainer_type& trainer, + const typename trainer_type::scalar_type min_learning_rate = 0.1, + long cache_size = 100 + ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, true, cache_size); } + +// ---------------------------------------------------------------------------------------- + +} + +#endif // DLIB_PEGASoS_ + |