135 files changed, 45082 insertions, 0 deletions
diff --git a/ml/dlib/dlib/svm/active_learning.h b/ml/dlib/dlib/svm/active_learning.h
new file mode 100644
index 000000000..581540e67
--- /dev/null
+++ b/ml/dlib/dlib/svm/active_learning.h
@@ -0,0 +1,162 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_ACTIVE_LEARnING_Hh_
+#define DLIB_ACTIVE_LEARnING_Hh_
+
+#include "active_learning_abstract.h"
+
+#include "svm_c_linear_dcd_trainer.h"
+#include <vector>
+
+namespace dlib
+{
+
+    enum active_learning_mode
+    {
+        max_min_margin,
+        ratio_margin
+    };
+    
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type,
+        typename in_sample_vector_type2
+        >
+    std::vector<unsigned long> impl_rank_unlabeled_training_samples (
+        const svm_c_linear_dcd_trainer<kernel_type>& trainer,
+        const in_sample_vector_type& samples,
+        const in_scalar_vector_type& labels,
+        const in_sample_vector_type2& unlabeled_samples,
+        const active_learning_mode mode 
+    )
+    {
+        DLIB_ASSERT(is_vector(unlabeled_samples) &&
+                     (samples.size() == 0 || is_learning_problem(samples, labels)) ,
+                "\t std::vector<unsigned long> rank_unlabeled_training_samples()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t is_vector(unlabeled_samples):         " << is_vector(unlabeled_samples) 
+                << "\n\t is_learning_problem(samples, labels): " << is_learning_problem(samples, labels) 
+                << "\n\t samples.size(): " << samples.size() 
+                << "\n\t labels.size():  " << labels.size() 
+                );
+
+        // If there aren't any training samples then all unlabeled_samples are equally good.
+        // So just report an arbitrary ordering.
+        if (samples.size() == 0 || unlabeled_samples.size() == 0)
+        {
+            std::vector<unsigned long> ret(unlabeled_samples.size());
+            for (unsigned long i = 0; i < ret.size(); ++i)
+                ret[i] = i;
+
+            return ret;
+        }
+
+        // We are going to score each unlabeled sample and put the score and index into
+        // results.  Then at the end of this function we just sort it and return the indices.
+        std::vector<std::pair<double, unsigned long> > results;
+        results.resize(unlabeled_samples.size());
+
+        // make sure we use this trainer's ability to warm start itself since that will make
+        // this whole function run a lot faster.  But first, we need to find out what the state
+        // we will be warm starting from is. 
+        typedef typename svm_c_linear_dcd_trainer<kernel_type>::optimizer_state optimizer_state;
+        optimizer_state state;
+        trainer.train(samples, labels, state); // call train() just to get state
+
+        decision_function<kernel_type> df;
+
+        std::vector<typename kernel_type::sample_type> temp_samples;
+        std::vector<typename kernel_type::scalar_type> temp_labels;
+        temp_samples.reserve(samples.size()+1);
+        temp_labels.reserve(labels.size()+1);
+        temp_samples.assign(samples.begin(), samples.end());
+        temp_labels.assign(labels.begin(), labels.end());
+        temp_samples.resize(temp_samples.size()+1);
+        temp_labels.resize(temp_labels.size()+1);
+
+
+        for (long i = 0; i < unlabeled_samples.size(); ++i)
+        {
+            temp_samples.back() = unlabeled_samples(i);
+            // figure out the margin for each possible labeling of this sample.
+
+            optimizer_state temp(state);
+            temp_labels.back() = +1;
+            df = trainer.train(temp_samples, temp_labels, temp);
+            const double margin_p = temp_labels.back()*df(temp_samples.back());
+
+            temp = state;
+            temp_labels.back() = -1;
+            df = trainer.train(temp_samples, temp_labels, temp);
+            const double margin_n = temp_labels.back()*df(temp_samples.back());
+
+            if (mode == max_min_margin)
+            {
+                // The score for this sample is its min possible margin over possible labels.
+                // Therefore, this score measures how much flexibility we have to label this
+                // sample however we want.  The intuition being that the most useful points to
+                // label are the ones that are still free to obtain either label.
+                results[i] = std::make_pair(std::min(margin_p, margin_n), i);
+            }
+            else
+            {
+                // In this case, the score for the sample is a ratio that tells how close the
+                // two margin values are to each other.  The closer they are the better.  So in
+                // this case we are saying we are looking for samples that have the same
+                // preference for either class label. 
+                if (std::abs(margin_p) >= std::abs(margin_n))
+                {
+                    if (margin_p != 0)
+                        results[i] = std::make_pair(margin_n/margin_p, i);
+                    else // if both are == 0 then say 0/0 == 1
+                        results[i] = std::make_pair(1, i);
+                }
+                else
+                {
+                    results[i] = std::make_pair(margin_p/margin_n, i);
+                }
+            }
+        }
+
+        // sort the results so the highest scoring samples come first.
+        std::sort(results.rbegin(), results.rend());
+
+        // transfer results into a vector with just sample indices so we can return it.
+        std::vector<unsigned long> ret(results.size());
+        for (unsigned long i = 0; i < ret.size(); ++i)
+            ret[i] = results[i].second;
+        return ret;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type,
+        typename in_sample_vector_type2
+        >
+    std::vector<unsigned long> rank_unlabeled_training_samples (
+        const svm_c_linear_dcd_trainer<kernel_type>& trainer,
+        const in_sample_vector_type& samples,
+        const in_scalar_vector_type& labels,
+        const in_sample_vector_type2& unlabeled_samples,
+        const active_learning_mode mode = max_min_margin
+    )
+    {
+        return impl_rank_unlabeled_training_samples(trainer,
+                                                    mat(samples),
+                                                    mat(labels),
+                                                    mat(unlabeled_samples),
+                                                    mode);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ACTIVE_LEARnING_Hh_
+
diff --git a/ml/dlib/dlib/svm/active_learning_abstract.h b/ml/dlib/dlib/svm/active_learning_abstract.h
new file mode 100644
index 000000000..76a5120e3
--- /dev/null
+++ b/ml/dlib/dlib/svm/active_learning_abstract.h
@@ -0,0 +1,75 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_
+#ifdef DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_
+
+#include "svm_c_linear_dcd_trainer_abstract.h"
+#include <vector>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    enum active_learning_mode
+    {
+        max_min_margin,
+        ratio_margin
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type,
+        typename in_sample_vector_type2
+        >
+    std::vector<unsigned long> rank_unlabeled_training_samples (
+        const svm_c_linear_dcd_trainer<kernel_type>& trainer,
+        const in_sample_vector_type& samples,
+        const in_scalar_vector_type& labels,
+        const in_sample_vector_type2& unlabeled_samples,
+        const active_learning_mode mode = max_min_margin
+    );
+    /*!
+        requires
+            - if (samples.size() != 0) then
+                - it must be legal to call trainer.train(samples, labels)
+                - is_learning_problem(samples, labels) == true
+            - unlabeled_samples must contain the same kind of vectors as samples.
+            - unlabeled_samples, samples, and labels must be matrices or types of 
+              objects convertible to a matrix via mat().
+            - is_vector(unlabeled_samples) == true
+        ensures
+            - Suppose that we wish to learn a binary classifier by calling
+              trainer.train(samples, labels) but we are also interested in selecting one of
+              the elements of unlabeled_samples to add to our training data.  Since doing
+              this requires us to find out the label of the sample, a potentially tedious
+              or expensive process, we would like to select the "best" element from
+              unlabeled_samples for labeling.  The rank_unlabeled_training_samples()
+              attempts to find this "best" element.  In particular, this function returns a
+              ranked list of all the elements in unlabeled_samples such that that the
+              "best" elements come first.
+            - The method used by this function is described in the paper:
+                Support Vector Machine Active Learning with Applications to Text Classification
+                by Simon Tong and Daphne Koller
+              In particular, this function implements the MaxMin Margin and Ratio Margin 
+              selection strategies described in the paper.  Moreover, the mode argument
+              to this function selects which of these strategies is used.
+            - returns a std::vector V such that:
+                - V contains a list of all the indices from unlabeled_samples.  Moreover,
+                  they are ordered so that the most useful samples come first.
+                - V.size() == unlabeled_samples.size()
+                - unlabeled_samples[V[0]] == The best sample to add into the training set.
+                - unlabeled_samples[V[1]] == The second best sample to add into the training set.
+                - unlabeled_samples[V[i]] == The i-th best sample to add into the training set.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ACTIVE_LEARnING_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/assignment_function.h b/ml/dlib/dlib/svm/assignment_function.h
new file mode 100644
index 000000000..fdacb2c17
--- /dev/null
+++ b/ml/dlib/dlib/svm/assignment_function.h
@@ -0,0 +1,255 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_ASSIGNMENT_FuNCTION_Hh_
+#define DLIB_ASSIGNMENT_FuNCTION_Hh_
+
+#include "assignment_function_abstract.h"
+#include "../matrix.h"
+#include <vector>
+#include "../optimization/max_cost_assignment.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor 
+        >
+    class assignment_function
+    {
+    public:
+
+        typedef typename feature_extractor::lhs_element lhs_element;
+        typedef typename feature_extractor::rhs_element rhs_element;
+
+
+        typedef std::pair<std::vector<lhs_element>, std::vector<rhs_element> > sample_type;
+
+        typedef std::vector<long> label_type;
+        typedef label_type result_type;
+
+        assignment_function()
+        {
+            weights.set_size(fe.num_features());
+            weights = 0;
+            bias = 0;
+            force_assignment = false;
+        }
+
+        explicit assignment_function(
+            const matrix<double,0,1>& weights_,
+            double bias_
+        ) : 
+            weights(weights_),
+            bias(bias_),
+            force_assignment(false)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(fe.num_features() == static_cast<unsigned long>(weights_.size()),
+                "\t assignment_function::assignment_function(weights_)"
+                << "\n\t These sizes should match"
+                << "\n\t fe.num_features(): " << fe.num_features() 
+                << "\n\t weights_.size():   " << weights_.size() 
+                << "\n\t this: " << this
+                );
+
+        }
+
+        assignment_function(
+            const matrix<double,0,1>& weights_,
+            double bias_,
+            const feature_extractor& fe_
+        ) :
+            fe(fe_),
+            weights(weights_),
+            bias(bias_),
+            force_assignment(false)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(fe_.num_features() == static_cast<unsigned long>(weights_.size()),
+                "\t assignment_function::assignment_function(weights_,fe_)"
+                << "\n\t These sizes should match"
+                << "\n\t fe_.num_features(): " << fe_.num_features() 
+                << "\n\t weights_.size():    " << weights_.size() 
+                << "\n\t this: " << this
+                );
+        }
+
+        assignment_function(
+            const matrix<double,0,1>& weights_,
+            double bias_,
+            const feature_extractor& fe_,
+            bool force_assignment_
+        ) :
+            fe(fe_),
+            weights(weights_),
+            bias(bias_),
+            force_assignment(force_assignment_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(fe_.num_features() == static_cast<unsigned long>(weights_.size()),
+                "\t assignment_function::assignment_function(weights_,fe_,force_assignment_)"
+                << "\n\t These sizes should match"
+                << "\n\t fe_.num_features(): " << fe_.num_features() 
+                << "\n\t weights_.size():    " << weights_.size() 
+                << "\n\t this: " << this
+                );
+        }
+
+        const feature_extractor& get_feature_extractor (
+        ) const { return fe; }
+
+        const matrix<double,0,1>& get_weights (
+        ) const { return weights; }
+
+        double get_bias (
+        ) const { return bias; }
+
+        bool forces_assignment (
+        ) const { return force_assignment; }
+
+        void predict_assignments (
+            const std::vector<lhs_element>& lhs,
+            const std::vector<rhs_element>& rhs,
+            result_type& assignment
+        ) const
+        {
+            assignment.clear();
+
+            matrix<double> cost;
+            unsigned long size;
+            if (force_assignment)
+            {
+                size = std::max(lhs.size(), rhs.size());
+            }
+            else
+            {
+                size = rhs.size() + lhs.size();
+            }
+            cost.set_size(size, size);
+
+            typedef typename feature_extractor::feature_vector_type feature_vector_type;
+            feature_vector_type feats;
+
+            // now fill out the cost assignment matrix
+            for (long r = 0; r < cost.nr(); ++r)
+            {
+                for (long c = 0; c < cost.nc(); ++c)
+                {
+                    if (r < (long)lhs.size() && c < (long)rhs.size())
+                    {
+                        fe.get_features(lhs[r], rhs[c], feats);
+                        cost(r,c) = dot(weights, feats) + bias;
+                    }
+                    else
+                    {
+                        cost(r,c) = 0;
+                    }
+                }
+            }
+
+
+            if (cost.size() != 0)
+            {
+                // max_cost_assignment() only works with integer matrices, so convert from
+                // double to integer.
+                const double scale = (std::numeric_limits<dlib::int64>::max()/1000)/max(abs(cost));
+                matrix<dlib::int64> int_cost = matrix_cast<dlib::int64>(round(cost*scale));
+                assignment = max_cost_assignment(int_cost);
+                assignment.resize(lhs.size());
+            }
+
+            // adjust assignment so that non-assignments have a value of -1
+            for (unsigned long i = 0; i < assignment.size(); ++i)
+            {
+                if (assignment[i] >= (long)rhs.size())
+                    assignment[i] = -1;
+            }
+        }
+
+        void predict_assignments (
+            const sample_type& item,
+            result_type& assignment
+        ) const
+        {
+            predict_assignments(item.first, item.second, assignment);
+        }
+
+        result_type operator()(
+            const std::vector<lhs_element>& lhs,
+            const std::vector<rhs_element>& rhs 
+        ) const
+        {
+            result_type temp;
+            predict_assignments(lhs,rhs,temp);
+            return temp;
+        }
+
+        result_type operator() (
+            const sample_type& item
+        ) const
+        {
+            return (*this)(item.first, item.second);
+        }
+
+    private:
+
+
+        feature_extractor fe;
+        matrix<double,0,1> weights;
+        double bias;
+        bool force_assignment;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    void serialize (
+        const assignment_function<feature_extractor>& item,
+        std::ostream& out
+    )
+    {
+        int version = 2;
+        serialize(version, out);
+        serialize(item.get_feature_extractor(), out);
+        serialize(item.get_weights(), out);
+        serialize(item.get_bias(), out);
+        serialize(item.forces_assignment(), out);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    void deserialize (
+        assignment_function<feature_extractor>& item,
+        std::istream& in 
+    )
+    {
+        feature_extractor fe;
+        matrix<double,0,1> weights;
+        double bias;
+        bool force_assignment;
+        int version = 0;
+        deserialize(version, in);
+        if (version != 2)
+            throw serialization_error("Unexpected version found while deserializing dlib::assignment_function.");
+
+        deserialize(fe, in);
+        deserialize(weights, in);
+        deserialize(bias, in);
+        deserialize(force_assignment, in);
+
+        item = assignment_function<feature_extractor>(weights, bias, fe, force_assignment);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ASSIGNMENT_FuNCTION_Hh_
+
diff --git a/ml/dlib/dlib/svm/assignment_function_abstract.h b/ml/dlib/dlib/svm/assignment_function_abstract.h
new file mode 100644
index 000000000..927731856
--- /dev/null
+++ b/ml/dlib/dlib/svm/assignment_function_abstract.h
@@ -0,0 +1,342 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_ASSIGNMENT_FuNCTION_ABSTRACT_Hh_
+#ifdef DLIB_ASSIGNMENT_FuNCTION_ABSTRACT_Hh_
+
+#include <vector>
+#include "../optimization/max_cost_assignment_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class example_feature_extractor
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object defines the interface a feature extractor must implement
+                if it is to be used with the assignment_function defined at the bottom
+                of this file.  
+                
+                The model used by assignment_function objects is the following.  
+                Given two sets of objects, the Left Hand Set (LHS) and Right Hand Set (RHS),
+                find a one-to-one mapping M from LHS into RHS such that:
+                    M == argmax_m  sum_{l in LHS} match_score(l,m(l))
+                Where match_score() returns a scalar value indicating how good it is
+                to say l maps to the RHS element m(l).  Additionally, in this model, 
+                m() is allowed to indicate that l doesn't map to anything, and in this 
+                case it is excluded from the sum.    
+
+                Finally, match_score() is defined as: 
+                    match_score(l,r) == dot(w, PSI(l,r)) + bias
+                where l is an element of LHS, r is an element of RHS, w is a parameter
+                vector and bias is a scalar valued parameter.
+
+                Therefore, a feature extractor defines how the PSI() feature vector 
+                is calculated.  In particular, PSI() is defined by the get_features()
+                method of this class.
+
+            THREAD SAFETY
+                Instances of this object are required to be threadsafe, that is, it should
+                be safe for multiple threads to make concurrent calls to the member
+                functions of this object.
+
+        !*/
+
+    public:
+
+        // This type should be a dlib::matrix capable of storing column vectors
+        // or an unsorted sparse vector type as defined in dlib/svm/sparse_vector_abstract.h.
+        typedef matrix_or_sparse_vector_type feature_vector_type;
+
+        // These two typedefs define the types used to represent an element in 
+        // the left hand and right hand sets.  You can use any copyable types here.
+        typedef user_defined_type_1 lhs_element;
+        typedef user_defined_type_2 rhs_element;
+
+        unsigned long num_features(
+        ) const;
+        /*!
+            ensures
+                - returns the dimensionality of the PSI() feature vector.  
+        !*/
+
+        void get_features (
+            const lhs_element& left,
+            const rhs_element& right,
+            feature_vector_type& feats
+        ) const;
+        /*!
+            ensures
+                - #feats == PSI(left,right)
+                  (i.e. This function computes a feature vector which, in some sense, 
+                  captures information useful for deciding if matching left to right 
+                  is "good").
+        !*/
+
+        unsigned long num_nonnegative_weights (
+        ) const;
+        /*!
+            ensures
+                - returns the number of elements of the w parameter vector which should be
+                  non-negative.  That is, this feature extractor is intended to be used
+                  with w vectors where the first num_nonnegative_weights() elements of w
+                  are >= 0.  That is, it should be the case that w(i) >= 0 for all i <
+                  num_nonnegative_weights().
+                - Note that num_nonnegative_weights() is just an optional method to allow
+                  you to tell a tool like the structural_assignment_trainer that the
+                  learned w should have a certain number of non-negative elements.
+                  Therefore, if you do not provide a num_nonnegative_weights() method in
+                  your feature extractor then it will default to a value of 0, indicating
+                  that all elements of the w parameter vector may be any value.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    void serialize(
+        const example_feature_extractor& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+    void deserialize(
+        example_feature_extractor& item, 
+        std::istream& in
+    );
+    /*!
+        provides deserialization support 
+    !*/
+
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor 
+        >
+    class assignment_function
+    {
+        /*!
+            REQUIREMENTS ON feature_extractor
+                It must be an object that implements an interface compatible with 
+                the example_feature_extractor discussed above.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for solving the optimal assignment problem given a 
+                user defined method for computing the quality of any particular assignment. 
+
+                To define this precisely, suppose you have two sets of objects, a 
+                Left Hand Set (LHS) and a Right Hand Set (RHS) and you want to 
+                find a one-to-one mapping M from LHS into RHS such that:
+                    M == argmax_m  sum_{l in LHS} match_score(l,m(l))
+                Where match_score() returns a scalar value indicating how good it is
+                to say l maps to the RHS element m(l).  Additionally, in this model, 
+                m() is allowed to indicate that l doesn't map to anything, and in this 
+                case it is excluded from the sum.    
+
+                Finally, this object supports match_score() functions of the form: 
+                    match_score(l,r) == dot(w, PSI(l,r)) + bias
+                where l is an element of LHS, r is an element of RHS, w is a parameter
+                vector, bias is a scalar valued parameter, and PSI() is defined by the
+                feature_extractor template argument.  
+
+            THREAD SAFETY
+                It is always safe to use distinct instances of this object in different
+                threads.  However, when a single instance is shared between threads then
+                the following rules apply:
+                    It is safe to call the const members of this object from multiple
+                    threads so long as the feature_extractor is also threadsafe.  This is
+                    because the const members are purely read-only operations.  However,
+                    any operation that modifies an assignment_function is not threadsafe.
+        !*/
+
+    public:
+
+        typedef typename feature_extractor::lhs_element  lhs_element;
+        typedef typename feature_extractor::rhs_element  rhs_element;
+        typedef          std::vector<long>               label_type;
+        typedef          label_type                      result_type;
+        typedef std::pair<std::vector<lhs_element>, std::vector<rhs_element> > sample_type;
+
+        assignment_function(
+        );
+        /*!
+            ensures
+                - #get_feature_extractor() == feature_extractor() 
+                  (i.e. it will have its default value)
+                - #get_weights().size() == #get_feature_extractor().num_features()
+                - #get_weights() == 0
+                - #get_bias() == 0
+                - #forces_assignment() == false 
+        !*/
+
+        explicit assignment_function(
+            const matrix<double,0,1>& weights,
+            double bias
+        );
+        /*!
+            requires
+                - feature_extractor().num_features() == weights.size()
+            ensures
+                - #get_feature_extractor() == feature_extractor() 
+                  (i.e. it will have its default value)
+                - #get_weights() == weights
+                - #get_bias() == bias
+                - #forces_assignment() == false 
+        !*/
+
+        assignment_function(
+            const matrix<double,0,1>& weights,
+            double bias,
+            const feature_extractor& fe
+        );
+        /*!
+            requires
+                - fe.num_features() == weights.size()
+            ensures
+                - #get_feature_extractor() == fe
+                - #get_weights() == weights
+                - #get_bias() == bias
+                - #forces_assignment() == false 
+        !*/
+
+        assignment_function(
+            const matrix<double,0,1>& weights,
+            double bias,
+            const feature_extractor& fe,
+            bool force_assignment
+        );
+        /*!
+            requires
+                - fe.num_features() == weights.size()
+            ensures
+                - #get_feature_extractor() == fe
+                - #get_weights() == weights
+                - #get_bias() == bias
+                - #forces_assignment() == force_assignment
+        !*/
+
+        const feature_extractor& get_feature_extractor (
+        ) const;
+        /*!
+            ensures
+                - returns the feature extractor used by this object
+        !*/
+
+        const matrix<double,0,1>& get_weights (
+        ) const;
+        /*!
+            ensures
+                - returns the parameter vector (w) associated with this assignment function. 
+                  The length of the vector is get_feature_extractor().num_features().  
+        !*/
+
+        double get_bias (
+        ) const;
+        /*!
+            ensures
+                - returns the bias parameter associated with this assignment function.
+        !*/
+
+        bool forces_assignment (
+        ) const; 
+        /*!
+            ensures
+                - returns true if this object is in the "forced assignment mode" and false
+                  otherwise.
+                - When deciding how to match LHS to RHS, this object can operate in one of 
+                  two modes.  In the default mode, this object will indicate that there is 
+                  no match for an element of LHS if the best matching element of RHS would 
+                  result in a negative match_score().  However, in the "forced assignment mode",
+                  this object will always make the assignment if there is an available 
+                  element in RHS, regardless of the match_score().
+
+                  Another way to understand this distinction is to consider an example.  
+                  Suppose LHS and RHS both contain 10 elements.  Then in the default mode, 
+                  it is possible for this object to indicate that there are anywhere between 
+                  0 to 10 matches between LHS and RHS.  However, in forced assignment mode 
+                  it will always indicate exactly 10 matches.   
+        !*/
+
+        result_type operator()(
+            const std::vector<lhs_element>& lhs,
+            const std::vector<rhs_element>& rhs 
+        ) const
+        /*!
+            ensures
+                - returns a vector ASSIGN such that:
+                    - ASSIGN.size() == lhs.size()
+                    - if (ASSIGN[i] != -1) then
+                        - lhs[i] is predicted to associate to rhs[ASSIGN[i]].
+                    - else
+                        - lhs[i] doesn't associate with anything in rhs.
+                    - All values in ASSIGN which are not equal to -1 are unique.  
+                      That is, ASSIGN will never indicate that more than one element
+                      of lhs is assigned to a particular element of rhs.
+        !*/
+
+        result_type operator() (
+            const sample_type& item
+        ) const;
+        /*!
+            ensures
+                - returns (*this)(item.first, item.second);
+        !*/
+
+        void predict_assignments (
+            const sample_type& item,
+            result_type& assignment
+        ) const;
+        /*!
+            ensures
+                - #assignment == (*this)(item)
+        !*/
+
+        void predict_assignments (
+            const std::vector<lhs_element>& lhs,
+            const std::vector<rhs_element>& rhs 
+            result_type& assignment
+        ) const;
+        /*!
+            ensures
+                - #assignment == (*this)(lhs,rhs)
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    void serialize (
+        const assignment_function<feature_extractor>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    void deserialize (
+        assignment_function<feature_extractor>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ASSIGNMENT_FuNCTION_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/cross_validate_assignment_trainer.h b/ml/dlib/dlib/svm/cross_validate_assignment_trainer.h
new file mode 100644
index 000000000..8166e1c82
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_assignment_trainer.h
@@ -0,0 +1,181 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_Hh_
+#define DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_Hh_
+
+#include "cross_validate_assignment_trainer_abstract.h"
+#include <vector>
+#include "../matrix.h"
+#include "svm.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename assignment_function
+        >
+    double test_assignment_function (
+        const assignment_function& assigner,
+        const std::vector<typename assignment_function::sample_type>& samples,
+        const std::vector<typename assignment_function::label_type>& labels
+    )
+    {
+        // make sure requires clause is not broken
+#ifdef ENABLE_ASSERTS
+        if (assigner.forces_assignment())
+        {
+            DLIB_ASSERT(is_forced_assignment_problem(samples, labels),
+                "\t double test_assignment_function()"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t is_forced_assignment_problem(samples,labels): " << is_forced_assignment_problem(samples,labels)
+                << "\n\t is_assignment_problem(samples,labels):        " << is_assignment_problem(samples,labels)
+                << "\n\t is_learning_problem(samples,labels):          " << is_learning_problem(samples,labels)
+                );
+        }
+        else
+        {
+            DLIB_ASSERT(is_assignment_problem(samples, labels),
+                "\t double test_assignment_function()"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t is_assignment_problem(samples,labels): " << is_assignment_problem(samples,labels)
+                << "\n\t is_learning_problem(samples,labels):   " << is_learning_problem(samples,labels)
+                );
+        }
+#endif
+        double total_right = 0;
+        double total = 0;
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            const std::vector<long>& out = assigner(samples[i]);
+            for (unsigned long j = 0; j < out.size(); ++j)
+            {
+                if (out[j] == labels[i][j])
+                    ++total_right;
+
+                ++total;
+            }
+        }
+
+        if (total != 0)
+            return total_right/total;
+        else
+            return 1;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    double cross_validate_assignment_trainer (
+        const trainer_type& trainer,
+        const std::vector<typename trainer_type::sample_type>& samples,
+        const std::vector<typename trainer_type::label_type>& labels,
+        const long folds
+    )
+    {
+        // make sure requires clause is not broken
+#ifdef ENABLE_ASSERTS
+        if (trainer.forces_assignment())
+        {
+            DLIB_ASSERT(is_forced_assignment_problem(samples, labels) &&
+                        1 < folds && folds <= static_cast<long>(samples.size()),
+                "\t double cross_validate_assignment_trainer()"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t samples.size(): " << samples.size() 
+                << "\n\t folds:  " << folds 
+                << "\n\t is_forced_assignment_problem(samples,labels): " << is_forced_assignment_problem(samples,labels)
+                << "\n\t is_assignment_problem(samples,labels):        " << is_assignment_problem(samples,labels)
+                << "\n\t is_learning_problem(samples,labels):          " << is_learning_problem(samples,labels)
+                );
+        }
+        else
+        {
+            DLIB_ASSERT(is_assignment_problem(samples, labels) &&
+                        1 < folds && folds <= static_cast<long>(samples.size()),
+                "\t double cross_validate_assignment_trainer()"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t samples.size(): " << samples.size() 
+                << "\n\t folds:  " << folds 
+                << "\n\t is_assignment_problem(samples,labels): " << is_assignment_problem(samples,labels)
+                << "\n\t is_learning_problem(samples,labels):   " << is_learning_problem(samples,labels)
+                );
+        }
+#endif
+
+
+
+        typedef typename trainer_type::sample_type sample_type;
+        typedef typename trainer_type::label_type label_type;
+
+        const long num_in_test  = samples.size()/folds;
+        const long num_in_train = samples.size() - num_in_test;
+
+
+        std::vector<sample_type> samples_test, samples_train;
+        std::vector<label_type> labels_test, labels_train;
+
+
+        long next_test_idx = 0;
+        double total_right = 0;
+        double total = 0;
+
+
+        for (long i = 0; i < folds; ++i)
+        {
+            samples_test.clear();
+            labels_test.clear();
+            samples_train.clear();
+            labels_train.clear();
+
+            // load up the test samples
+            for (long cnt = 0; cnt < num_in_test; ++cnt)
+            {
+                samples_test.push_back(samples[next_test_idx]);
+                labels_test.push_back(labels[next_test_idx]);
+                next_test_idx = (next_test_idx + 1)%samples.size();
+            }
+
+            // load up the training samples
+            long next = next_test_idx;
+            for (long cnt = 0; cnt < num_in_train; ++cnt)
+            {
+                samples_train.push_back(samples[next]);
+                labels_train.push_back(labels[next]);
+                next = (next + 1)%samples.size();
+            }
+
+
+            const typename trainer_type::trained_function_type& df = trainer.train(samples_train,labels_train);
+
+            // check how good df is on the test data
+            for (unsigned long i = 0; i < samples_test.size(); ++i)
+            {
+                const std::vector<long>& out = df(samples_test[i]);
+                for (unsigned long j = 0; j < out.size(); ++j)
+                {
+                    if (out[j] == labels_test[i][j])
+                        ++total_right;
+
+                    ++total;
+                }
+            }
+
+        } // for (long i = 0; i < folds; ++i)
+
+        if (total != 0)
+            return total_right/total;
+        else
+            return 1;
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/cross_validate_assignment_trainer_abstract.h b/ml/dlib/dlib/svm/cross_validate_assignment_trainer_abstract.h
new file mode 100644
index 000000000..05dd4758e
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_assignment_trainer_abstract.h
@@ -0,0 +1,69 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_ABSTRACT_Hh_
+#ifdef DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_ABSTRACT_Hh_
+
+#include <vector>
+#include "../matrix.h"
+#include "svm.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename assignment_function
+        >
+    double test_assignment_function (
+        const assignment_function& assigner,
+        const std::vector<typename assignment_function::sample_type>& samples,
+        const std::vector<typename assignment_function::label_type>& labels
+    );
+    /*!
+        requires
+            - is_assignment_problem(samples, labels)
+            - if (assigner.forces_assignment()) then
+                - is_forced_assignment_problem(samples, labels) 
+            - assignment_function == an instantiation of the dlib::assignment_function
+              template or an object with a compatible interface.
+        ensures
+            - Tests assigner against the given samples and labels and returns the fraction 
+              of assignments predicted correctly.  
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    double cross_validate_assignment_trainer (
+        const trainer_type& trainer,
+        const std::vector<typename trainer_type::sample_type>& samples,
+        const std::vector<typename trainer_type::label_type>& labels,
+        const long folds
+    );
+    /*!
+        requires
+            - is_assignment_problem(samples, labels)
+            - if (trainer.forces_assignment()) then
+                - is_forced_assignment_problem(samples, labels) 
+            - 1 < folds <= samples.size()
+            - trainer_type == dlib::structural_assignment_trainer or an object
+              with a compatible interface.
+        ensures
+            - performs k-fold cross validation by using the given trainer to solve the
+              given assignment learning problem for the given number of folds.  Each fold 
+              is tested using the output of the trainer and the fraction of assignments
+              predicted correctly is returned.
+            - The number of folds used is given by the folds argument.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CROSS_VALIDATE_ASSiGNEMNT_TRAINER_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/cross_validate_graph_labeling_trainer.h b/ml/dlib/dlib/svm/cross_validate_graph_labeling_trainer.h
new file mode 100644
index 000000000..83e4e4048
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_graph_labeling_trainer.h
@@ -0,0 +1,258 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_CROSS_VALIDATE_GRAPh_LABELING_TRAINER_Hh_
+#define DLIB_CROSS_VALIDATE_GRAPh_LABELING_TRAINER_Hh_
+
+#include "../array.h"
+#include "../graph_cuts/min_cut.h"
+#include "svm.h"
+#include "cross_validate_graph_labeling_trainer_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename graph_labeler,
+        typename graph_type
+        >
+    matrix<double,1,2> test_graph_labeling_function (
+        const graph_labeler& labeler,
+        const dlib::array<graph_type>& samples,
+        const std::vector<std::vector<bool> >& labels,
+        const std::vector<std::vector<double> >& losses
+    )
+    {
+#ifdef ENABLE_ASSERTS
+        std::string reason_for_failure;
+        DLIB_ASSERT(is_graph_labeling_problem(samples, labels, reason_for_failure) ,
+            "\t matrix test_graph_labeling_function()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t samples.size(): " << samples.size() 
+            << "\n\t reason_for_failure: " << reason_for_failure 
+            );
+        DLIB_ASSERT((losses.size() == 0 || sizes_match(labels, losses) == true) &&
+                    all_values_are_nonnegative(losses) == true,
+                "\t matrix test_graph_labeling_function()"
+                << "\n\t Invalid inputs were given to this function."
+                << "\n\t labels.size():  " << labels.size() 
+                << "\n\t losses.size():  " << losses.size() 
+                << "\n\t sizes_match(labels,losses): " << sizes_match(labels,losses) 
+                << "\n\t all_values_are_nonnegative(losses): " << all_values_are_nonnegative(losses) 
+                 );
+#endif
+
+        std::vector<bool> temp;
+        double num_pos_correct = 0;
+        double num_pos = 0;
+        double num_neg_correct = 0;
+        double num_neg = 0;
+
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            labeler(samples[i], temp);
+
+            for (unsigned long j = 0; j < labels[i].size(); ++j)
+            {
+                // What is the loss for this example?  It's just 1 unless we have a 
+                // per example loss vector.
+                const double loss = (losses.size() == 0) ? 1.0 : losses[i][j];
+
+                if (labels[i][j])
+                {
+                    num_pos += loss;
+                    if (temp[j])
+                        num_pos_correct += loss;
+                }
+                else
+                {
+                    num_neg += loss;
+                    if (!temp[j])
+                        num_neg_correct += loss;
+                }
+            }
+        }
+
+        matrix<double, 1, 2> res;
+        if (num_pos != 0)
+            res(0) = num_pos_correct/num_pos; 
+        else
+            res(0) = 1;
+        if (num_neg != 0)
+            res(1) = num_neg_correct/num_neg; 
+        else
+            res(1) = 1;
+        return res;
+    }
+
+    template <
+        typename graph_labeler,
+        typename graph_type
+        >
+    matrix<double,1,2> test_graph_labeling_function (
+        const graph_labeler& labeler,
+        const dlib::array<graph_type>& samples,
+        const std::vector<std::vector<bool> >& labels
+    )
+    {
+        std::vector<std::vector<double> > losses;
+        return test_graph_labeling_function(labeler, samples, labels, losses);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename graph_type
+        >
+    matrix<double,1,2> cross_validate_graph_labeling_trainer (
+        const trainer_type& trainer,
+        const dlib::array<graph_type>& samples,
+        const std::vector<std::vector<bool> >& labels,
+        const std::vector<std::vector<double> >& losses,
+        const long folds
+    )
+    {
+#ifdef ENABLE_ASSERTS
+        std::string reason_for_failure;
+        DLIB_ASSERT(is_graph_labeling_problem(samples, labels, reason_for_failure),
+            "\t matrix cross_validate_graph_labeling_trainer()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t samples.size(): " << samples.size() 
+            << "\n\t reason_for_failure: " << reason_for_failure 
+            );
+        DLIB_ASSERT( 1 < folds && folds <= static_cast<long>(samples.size()),
+            "\t matrix cross_validate_graph_labeling_trainer()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t folds:  " << folds 
+            );
+        DLIB_ASSERT((losses.size() == 0 || sizes_match(labels, losses) == true) &&
+                    all_values_are_nonnegative(losses) == true,
+                "\t matrix cross_validate_graph_labeling_trainer()"
+                << "\n\t Invalid inputs were given to this function."
+                << "\n\t labels.size():  " << labels.size() 
+                << "\n\t losses.size():  " << losses.size() 
+                << "\n\t sizes_match(labels,losses): " << sizes_match(labels,losses) 
+                << "\n\t all_values_are_nonnegative(losses): " << all_values_are_nonnegative(losses) 
+                 );
+#endif
+
+        typedef std::vector<bool> label_type;
+
+        const long num_in_test  = samples.size()/folds;
+        const long num_in_train = samples.size() - num_in_test;
+
+
+        dlib::array<graph_type> samples_test, samples_train;
+        std::vector<label_type> labels_test, labels_train;
+        std::vector<std::vector<double> > losses_test, losses_train;
+
+
+        long next_test_idx = 0;
+
+        std::vector<bool> temp;
+        double num_pos_correct = 0;
+        double num_pos = 0;
+        double num_neg_correct = 0;
+        double num_neg = 0;
+
+        graph_type gtemp;
+
+        for (long i = 0; i < folds; ++i)
+        {
+            samples_test.clear();
+            labels_test.clear();
+            losses_test.clear();
+            samples_train.clear();
+            labels_train.clear();
+            losses_train.clear();
+
+            // load up the test samples
+            for (long cnt = 0; cnt < num_in_test; ++cnt)
+            {
+                copy_graph(samples[next_test_idx], gtemp);
+                samples_test.push_back(gtemp);
+                labels_test.push_back(labels[next_test_idx]);
+                if (losses.size() != 0)
+                    losses_test.push_back(losses[next_test_idx]);
+                next_test_idx = (next_test_idx + 1)%samples.size();
+            }
+
+            // load up the training samples
+            long next = next_test_idx;
+            for (long cnt = 0; cnt < num_in_train; ++cnt)
+            {
+                copy_graph(samples[next], gtemp);
+                samples_train.push_back(gtemp);
+                labels_train.push_back(labels[next]);
+                if (losses.size() != 0)
+                    losses_train.push_back(losses[next]);
+                next = (next + 1)%samples.size();
+            }
+
+
+            const typename trainer_type::trained_function_type& labeler = trainer.train(samples_train,labels_train,losses_train);
+
+            // check how good labeler is on the test data
+            for (unsigned long i = 0; i < samples_test.size(); ++i)
+            {
+                labeler(samples_test[i], temp);
+                for (unsigned long j = 0; j < labels_test[i].size(); ++j)
+                {
+                    // What is the loss for this example?  It's just 1 unless we have a 
+                    // per example loss vector.
+                    const double loss = (losses_test.size() == 0) ? 1.0 : losses_test[i][j];
+
+                    if (labels_test[i][j])
+                    {
+                        num_pos += loss;
+                        if (temp[j])
+                            num_pos_correct += loss;
+                    }
+                    else
+                    {
+                        num_neg += loss;
+                        if (!temp[j])
+                            num_neg_correct += loss;
+                    }
+                }
+            }
+
+        } // for (long i = 0; i < folds; ++i)
+
+
+        matrix<double, 1, 2> res;
+        if (num_pos != 0)
+            res(0) = num_pos_correct/num_pos; 
+        else
+            res(0) = 1;
+        if (num_neg != 0)
+            res(1) = num_neg_correct/num_neg; 
+        else
+            res(1) = 1;
+        return res;
+    }
+
+    template <
+        typename trainer_type,
+        typename graph_type
+        >
+    matrix<double,1,2> cross_validate_graph_labeling_trainer (
+        const trainer_type& trainer,
+        const dlib::array<graph_type>& samples,
+        const std::vector<std::vector<bool> >& labels,
+        const long folds
+    )
+    {
+        std::vector<std::vector<double> > losses;
+        return cross_validate_graph_labeling_trainer(trainer, samples, labels, losses, folds);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CROSS_VALIDATE_GRAPh_LABELING_TRAINER_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/cross_validate_graph_labeling_trainer_abstract.h b/ml/dlib/dlib/svm/cross_validate_graph_labeling_trainer_abstract.h
new file mode 100644
index 000000000..cda4af91e
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_graph_labeling_trainer_abstract.h
@@ -0,0 +1,147 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_CROSS_VALIDATE_GRAPh_LABELING_TRAINER_ABSTRACT_Hh_
+#ifdef DLIB_CROSS_VALIDATE_GRAPh_LABELING_TRAINER_ABSTRACT_Hh_
+
+#include "../array/array_kernel_abstract.h"
+#include <vector>
+#include "../matrix/matrix_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename graph_labeler,
+        typename graph_type
+        >
+    matrix<double,1,2> test_graph_labeling_function (
+        const graph_labeler& labeler,
+        const dlib::array<graph_type>& samples,
+        const std::vector<std::vector<bool> >& labels
+    );
+    /*!
+        requires
+            - is_graph_labeling_problem(samples,labels) == true
+            - graph_labeler == an object with an interface compatible with the
+              dlib::graph_labeler object.
+            - the following must be a valid expression: labeler(samples[0]);
+        ensures
+            - This function tests the accuracy of the given graph labeler against
+              the sample graphs and their associated labels.  In particular, this
+              function returns a matrix R such that:
+                - R(0) == The fraction of nodes which are supposed to have a label of
+                  true that are labeled as such by the labeler.
+                - R(1) == The fraction of nodes which are supposed to have a label of
+                  false that are labeled as such by the labeler.
+              Therefore, if R is [1,1] then the labeler makes perfect predictions while
+              an R of [0,0] indicates that it gets everything wrong.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename graph_labeler,
+        typename graph_type
+        >
+    matrix<double,1,2> test_graph_labeling_function (
+        const graph_labeler& labeler,
+        const dlib::array<graph_type>& samples,
+        const std::vector<std::vector<bool> >& labels,
+        const std::vector<std::vector<double> >& losses
+    );
+    /*!
+        requires
+            - is_graph_labeling_problem(samples,labels) == true
+            - graph_labeler == an object with an interface compatible with the
+              dlib::graph_labeler object.
+            - the following must be a valid expression: labeler(samples[0]);
+            - if (losses.size() != 0) then
+                - sizes_match(labels, losses) == true
+                - all_values_are_nonnegative(losses) == true
+        ensures
+            - This overload of test_graph_labeling_function() does the same thing as the
+              one defined above, except that instead of counting 1 for each labeling
+              mistake, it weights each mistake according to the corresponding value in
+              losses.  That is, instead of counting a value of 1 for making a mistake on
+              samples[i].node(j), this routine counts a value of losses[i][j].  Under this
+              interpretation, the loss values represent how useful it is to correctly label
+              each node.  Therefore, the values returned represent fractions of overall
+              labeling utility rather than raw labeling accuracy.  
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename graph_type
+        >
+    matrix<double,1,2> cross_validate_graph_labeling_trainer (
+        const trainer_type& trainer,
+        const dlib::array<graph_type>& samples,
+        const std::vector<std::vector<bool> >& labels,
+        const long folds
+    );
+    /*!
+        requires
+            - is_graph_labeling_problem(samples,labels) == true
+            - 1 < folds <= samples.size()
+            - trainer_type == an object which trains some kind of graph labeler object
+              (e.g. structural_graph_labeling_trainer)
+        ensures
+            - performs k-fold cross validation by using the given trainer to solve the
+              given graph labeling problem for the given number of folds.  Each fold 
+              is tested using the output of the trainer and the average classification 
+              accuracy from all folds is returned.  In particular, this function returns 
+              a matrix R such that:
+                - R(0) == The fraction of nodes which are supposed to have a label of
+                  true that are labeled as such by the learned labeler.
+                - R(1) == The fraction of nodes which are supposed to have a label of
+                  false that are labeled as such by the learned labeler.
+              Therefore, if R is [1,1] then the labeler makes perfect predictions while
+              an R of [0,0] indicates that it gets everything wrong.
+            - The number of folds used is given by the folds argument.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename graph_type
+        >
+    matrix<double,1,2> cross_validate_graph_labeling_trainer (
+        const trainer_type& trainer,
+        const dlib::array<graph_type>& samples,
+        const std::vector<std::vector<bool> >& labels,
+        const std::vector<std::vector<double> >& losses,
+        const long folds
+    );
+    /*!
+        requires
+            - is_graph_labeling_problem(samples,labels) == true
+            - 1 < folds <= samples.size()
+            - trainer_type == an object which trains some kind of graph labeler object
+              (e.g. structural_graph_labeling_trainer)
+            - if (losses.size() != 0) then
+                - sizes_match(labels, losses) == true
+                - all_values_are_nonnegative(losses) == true
+        ensures
+            - This overload of cross_validate_graph_labeling_trainer() does the same thing
+              as the one defined above, except that instead of counting 1 for each labeling
+              mistake, it weights each mistake according to the corresponding value in
+              losses.  That is, instead of counting a value of 1 for making a mistake on
+              samples[i].node(j), this routine counts a value of losses[i][j].  Under this
+              interpretation, the loss values represent how useful it is to correctly label
+              each node.  Therefore, the values returned represent fractions of overall
+              labeling utility rather than raw labeling accuracy.  
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CROSS_VALIDATE_GRAPh_LABELING_TRAINER_ABSTRACT_Hh_
+
+
+
diff --git a/ml/dlib/dlib/svm/cross_validate_multiclass_trainer.h b/ml/dlib/dlib/svm/cross_validate_multiclass_trainer.h
new file mode 100644
index 000000000..be8fa3f3f
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_multiclass_trainer.h
@@ -0,0 +1,208 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_CROSS_VALIDATE_MULTICLASS_TRaINER_Hh_
+#define DLIB_CROSS_VALIDATE_MULTICLASS_TRaINER_Hh_
+
+#include <vector>
+#include "../matrix.h"
+#include "cross_validate_multiclass_trainer_abstract.h"
+#include <sstream>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename dec_funct_type,
+        typename sample_type,
+        typename label_type
+        >
+    const matrix<double> test_multiclass_decision_function (
+        const dec_funct_type& dec_funct,
+        const std::vector<sample_type>& x_test,
+        const std::vector<label_type>& y_test
+    )
+    {
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT( is_learning_problem(x_test,y_test) == true,
+                    "\tmatrix test_multiclass_decision_function()"
+                    << "\n\t invalid inputs were given to this function"
+                    << "\n\t is_learning_problem(x_test,y_test): " 
+                    << is_learning_problem(x_test,y_test));
+
+
+        const std::vector<label_type> all_labels = dec_funct.get_labels();
+
+        // make a lookup table that maps from labels to their index in all_labels
+        std::map<label_type,unsigned long> label_to_int;
+        for (unsigned long i = 0; i < all_labels.size(); ++i)
+            label_to_int[all_labels[i]] = i;
+
+        matrix<double, 0, 0, typename dec_funct_type::mem_manager_type> res;
+        res.set_size(all_labels.size(), all_labels.size());
+
+        res = 0;
+
+        typename std::map<label_type,unsigned long>::const_iterator iter;
+
+        // now test this trained object 
+        for (unsigned long i = 0; i < x_test.size(); ++i)
+        {
+            iter = label_to_int.find(y_test[i]);
+            // ignore samples with labels that the decision function doesn't know about.
+            if (iter == label_to_int.end())
+                continue;
+
+            const unsigned long truth = iter->second;
+            const unsigned long pred  = label_to_int[dec_funct(x_test[i])];
+
+            res(truth,pred) += 1;
+        }
+
+        return res;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    class cross_validation_error : public dlib::error 
+    { 
+    public: 
+        cross_validation_error(const std::string& msg) : dlib::error(msg){};
+    };
+
+    template <
+        typename trainer_type,
+        typename sample_type,
+        typename label_type 
+        >
+    const matrix<double> cross_validate_multiclass_trainer (
+        const trainer_type& trainer,
+        const std::vector<sample_type>& x,
+        const std::vector<label_type>& y,
+        const long folds
+    )
+    {
+        typedef typename trainer_type::mem_manager_type mem_manager_type;
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_learning_problem(x,y) == true &&
+                    1 < folds && folds <= static_cast<long>(x.size()),
+            "\tmatrix cross_validate_multiclass_trainer()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t x.size(): " << x.size() 
+            << "\n\t folds:  " << folds 
+            << "\n\t is_learning_problem(x,y): " << is_learning_problem(x,y)
+            );
+
+        const std::vector<label_type> all_labels = select_all_distinct_labels(y);
+
+        // count the number of times each label shows up 
+        std::map<label_type,long> label_counts;
+        for (unsigned long i = 0; i < y.size(); ++i)
+            label_counts[y[i]] += 1;
+
+
+        // figure out how many samples from each class will be in the test and train splits 
+        std::map<label_type,long> num_in_test, num_in_train;
+        for (typename std::map<label_type,long>::iterator i = label_counts.begin(); i != label_counts.end(); ++i)
+        {
+            const long in_test = i->second/folds;
+            if (in_test == 0)
+            {
+                std::ostringstream sout;
+                sout << "In dlib::cross_validate_multiclass_trainer(), the number of folds was larger" << std::endl;
+                sout << "than the number of elements of one of the training classes." << std::endl;
+                sout << "  folds: "<< folds << std::endl;
+                sout << "  size of class " << i->first << ": "<< i->second << std::endl;
+                throw cross_validation_error(sout.str());
+            }
+            num_in_test[i->first] = in_test; 
+            num_in_train[i->first] = i->second - in_test;
+        }
+
+
+
+        std::vector<sample_type> x_test, x_train;
+        std::vector<label_type> y_test, y_train;
+
+        matrix<double, 0, 0, mem_manager_type> res;
+
+        std::map<label_type,long> next_test_idx;
+        for (unsigned long i = 0; i < all_labels.size(); ++i)
+            next_test_idx[all_labels[i]] = 0;
+
+        label_type label;
+
+        for (long i = 0; i < folds; ++i)
+        {
+            x_test.clear();
+            y_test.clear();
+            x_train.clear();
+            y_train.clear();
+
+            // load up the test samples
+            for (unsigned long j = 0; j < all_labels.size(); ++j)
+            {
+                label = all_labels[j];
+                long next = next_test_idx[label];
+
+                long cur = 0;
+                const long num_needed = num_in_test[label];
+                while (cur < num_needed)
+                {
+                    if (y[next] == label)
+                    {
+                        x_test.push_back(x[next]);
+                        y_test.push_back(label);
+                        ++cur;
+                    }
+                    next = (next + 1)%x.size();
+                }
+
+                next_test_idx[label] = next;
+            }
+
+            // load up the training samples
+            for (unsigned long j = 0; j < all_labels.size(); ++j)
+            {
+                label = all_labels[j];
+                long next = next_test_idx[label];
+
+                long cur = 0;
+                const long num_needed = num_in_train[label];
+                while (cur < num_needed)
+                {
+                    if (y[next] == label)
+                    {
+                        x_train.push_back(x[next]);
+                        y_train.push_back(label);
+                        ++cur;
+                    }
+                    next = (next + 1)%x.size();
+                }
+            }
+
+
+            try
+            {
+                // do the training and testing
+                res += test_multiclass_decision_function(trainer.train(x_train,y_train),x_test,y_test);
+            }
+            catch (invalid_nu_error&)
+            {
+                // just ignore cases which result in an invalid nu
+            }
+
+        } // for (long i = 0; i < folds; ++i)
+
+        return res;
+    }
+
+}
+
+// ----------------------------------------------------------------------------------------
+
+#endif // DLIB_CROSS_VALIDATE_MULTICLASS_TRaINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/cross_validate_multiclass_trainer_abstract.h b/ml/dlib/dlib/svm/cross_validate_multiclass_trainer_abstract.h
new file mode 100644
index 000000000..f84503cdc
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_multiclass_trainer_abstract.h
@@ -0,0 +1,99 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_CROSS_VALIDATE_MULTICLASS_TRaINER_ABSTRACT_Hh_
+#ifdef DLIB_CROSS_VALIDATE_MULTICLASS_TRaINER_ABSTRACT_Hh_
+
+#include <vector>
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename dec_funct_type,
+        typename sample_type,
+        typename label_type
+        >
+    const matrix<double> test_multiclass_decision_function (
+        const dec_funct_type& dec_funct,
+        const std::vector<sample_type>& x_test,
+        const std::vector<label_type>& y_test
+    );
+    /*!
+        requires
+            - is_learning_problem(x_test, y_test)
+            - dec_funct_type == some kind of multiclass decision function object 
+              (e.g. one_vs_one_decision_function)
+        ensures
+            - Tests dec_funct against the given samples in x_test and labels in y_test
+              and returns a confusion matrix summarizing the results.
+            - let L = dec_funct.get_labels().  Then the confusion matrix C returned 
+              by this function has the following properties.
+                - C.nr() == C.nc() == L.size()
+                - C(r,c) == the number of times a sample with label L(r) was predicted
+                  to have a label of L(c)
+            - Any samples with a y_test value not in L are ignored.  That is, samples
+              with labels the decision function hasn't ever seen before are ignored.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    class cross_validation_error : public dlib::error 
+    { 
+        /*!
+            This is the exception class used by the cross_validate_multiclass_trainer() 
+            routine.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename sample_type,
+        typename label_type 
+        >
+    const matrix<double> cross_validate_multiclass_trainer (
+        const trainer_type& trainer,
+        const std::vector<sample_type>& x,
+        const std::vector<label_type>& y,
+        const long folds
+    );
+    /*!
+        requires
+            - is_learning_problem(x,y)
+            - 1 < folds <= x.size()
+            - trainer_type == some kind of multiclass classification trainer object (e.g. one_vs_one_trainer)
+        ensures
+            - performs k-fold cross validation by using the given trainer to solve the
+              given multiclass classification problem for the given number of folds.
+              Each fold is tested using the output of the trainer and the confusion
+              matrix from all folds is summed and returned.
+            - The total confusion matrix is computed by running test_binary_decision_function()
+              on each fold and summing its output.
+            - The number of folds used is given by the folds argument.
+            - let L = select_all_distinct_labels(y).  Then the confusion matrix C returned 
+              by this function has the following properties.
+                - C.nr() == C.nc() == L.size()
+                - C(r,c) == the number of times a sample with label L(r) was predicted
+                  to have a label of L(c)
+
+              Note that sum(C) might be slightly less than x.size().  This happens if the number of 
+              samples in a class is not an even multiple of folds.  This is because each fold has the 
+              same number of test samples in it and so if the number of samples in a class isn't a 
+              multiple of folds then a few are not tested.  
+        throws
+            - cross_validation_error
+              This exception is thrown if one of the classes has fewer samples than
+              the number of requested folds.
+    !*/
+
+}
+
+// ----------------------------------------------------------------------------------------
+
+#endif // DLIB_CROSS_VALIDATE_MULTICLASS_TRaINER_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/cross_validate_object_detection_trainer.h b/ml/dlib/dlib/svm/cross_validate_object_detection_trainer.h
new file mode 100644
index 000000000..7cb38f0b7
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_object_detection_trainer.h
@@ -0,0 +1,430 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_CROSS_VALIDATE_OBJECT_DETECTION_TRaINER_Hh_
+#define DLIB_CROSS_VALIDATE_OBJECT_DETECTION_TRaINER_Hh_
+
+#include "cross_validate_object_detection_trainer_abstract.h"
+#include <vector>
+#include "../matrix.h"
+#include "svm.h"
+#include "../geometry.h"
+#include "../image_processing/full_object_detection.h"
+#include "../image_processing/box_overlap_testing.h"
+#include "../statistics.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        inline unsigned long number_of_truth_hits (
+            const std::vector<full_object_detection>& truth_boxes,
+            const std::vector<rectangle>& ignore,
+            const std::vector<std::pair<double,rectangle> >& boxes,
+            const test_box_overlap& overlap_tester,
+            std::vector<std::pair<double,bool> >& all_dets,
+            unsigned long& missing_detections,
+            const test_box_overlap& overlaps_ignore_tester 
+        )
+        /*!
+            ensures
+                - returns the number of elements in truth_boxes which are overlapped by an 
+                  element of boxes.  In this context, two boxes, A and B, overlap if and only if
+                  overlap_tester(A,B) == true.
+                - No element of boxes is allowed to account for more than one element of truth_boxes.  
+                - The returned number is in the range [0,truth_boxes.size()]
+                - Adds the score for each box from boxes into all_dets and labels each with
+                  a bool indicating if it hit a truth box.  Note that we skip boxes that
+                  don't hit any truth boxes and match an ignore box.
+                - Adds the number of truth boxes which didn't have any hits into
+                  missing_detections.
+        !*/
+        {
+            if (boxes.size() == 0)
+            {
+                missing_detections += truth_boxes.size();
+                return 0;
+            }
+
+            unsigned long count = 0;
+            std::vector<bool> used(boxes.size(),false);
+            for (unsigned long i = 0; i < truth_boxes.size(); ++i)
+            {
+                bool found_match = false;
+                // Find the first box that hits truth_boxes[i]
+                for (unsigned long j = 0; j < boxes.size(); ++j)
+                {
+                    if (used[j])
+                        continue;
+
+                    if (overlap_tester(truth_boxes[i].get_rect(), boxes[j].second))
+                    {
+                        used[j] = true;
+                        ++count;
+                        found_match = true;
+                        break;
+                    }
+                }
+
+                if (!found_match)
+                    ++missing_detections;
+            }
+
+            for (unsigned long i = 0; i < boxes.size(); ++i)
+            {
+                // only out put boxes if they match a truth box or are not ignored.
+                if (used[i] || !overlaps_any_box(overlaps_ignore_tester, ignore, boxes[i].second))
+                {
+                    all_dets.push_back(std::make_pair(boxes[i].first, used[i]));
+                }
+            }
+
+            return count;
+        }
+
+        inline unsigned long number_of_truth_hits (
+            const std::vector<full_object_detection>& truth_boxes,
+            const std::vector<rectangle>& ignore,
+            const std::vector<std::pair<double,rectangle> >& boxes,
+            const test_box_overlap& overlap_tester,
+            std::vector<std::pair<double,bool> >& all_dets,
+            unsigned long& missing_detections
+        )
+        {
+            return number_of_truth_hits(truth_boxes, ignore, boxes, overlap_tester, all_dets, missing_detections, overlap_tester);
+        }
+
+    // ------------------------------------------------------------------------------------
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename object_detector_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> test_object_detection_function (
+        object_detector_type& detector,
+        const image_array_type& images,
+        const std::vector<std::vector<full_object_detection> >& truth_dets,
+        const std::vector<std::vector<rectangle> >& ignore,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_CASSERT( is_learning_problem(images,truth_dets) == true && 
+                        ignore.size() == images.size(),
+                    "\t matrix test_object_detection_function()"
+                    << "\n\t invalid inputs were given to this function"
+                    << "\n\t is_learning_problem(images,truth_dets): " << is_learning_problem(images,truth_dets)
+                    << "\n\t ignore.size(): " << ignore.size() 
+                    << "\n\t images.size(): " << images.size() 
+                    );
+
+
+
+        double correct_hits = 0;
+        double total_true_targets = 0;
+
+        std::vector<std::pair<double,bool> > all_dets;
+        unsigned long missing_detections = 0;
+
+
+        for (unsigned long i = 0; i < images.size(); ++i)
+        {
+            std::vector<std::pair<double,rectangle> > hits; 
+            detector(images[i], hits, adjust_threshold);
+
+            correct_hits += impl::number_of_truth_hits(truth_dets[i], ignore[i], hits, overlap_tester, all_dets, missing_detections);
+            total_true_targets += truth_dets[i].size();
+        }
+
+        std::sort(all_dets.rbegin(), all_dets.rend());
+
+        double precision, recall;
+
+        double total_hits = all_dets.size();
+
+        if (total_hits == 0)
+            precision = 1;
+        else
+            precision = correct_hits / total_hits;
+
+        if (total_true_targets == 0)
+            recall = 1;
+        else
+            recall = correct_hits / total_true_targets;
+
+        matrix<double, 1, 3> res;
+        res = precision, recall, average_precision(all_dets, missing_detections);
+        return res;
+    }
+
+    template <
+        typename object_detector_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> test_object_detection_function (
+        object_detector_type& detector,
+        const image_array_type& images,
+        const std::vector<std::vector<rectangle> >& truth_dets,
+        const std::vector<std::vector<rectangle> >& ignore,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    )
+    {
+        // convert into a list of regular rectangles.
+        std::vector<std::vector<full_object_detection> > rects(truth_dets.size());
+        for (unsigned long i = 0; i < truth_dets.size(); ++i)
+        {
+            for (unsigned long j = 0; j < truth_dets[i].size(); ++j)
+            {
+                rects[i].push_back(full_object_detection(truth_dets[i][j]));
+            }
+        }
+
+        return test_object_detection_function(detector, images, rects, ignore, overlap_tester, adjust_threshold);
+    }
+
+    template <
+        typename object_detector_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> test_object_detection_function (
+        object_detector_type& detector,
+        const image_array_type& images,
+        const std::vector<std::vector<rectangle> >& truth_dets,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    )
+    {
+        std::vector<std::vector<rectangle> > ignore(images.size());
+        return test_object_detection_function(detector,images,truth_dets,ignore, overlap_tester, adjust_threshold);
+    }
+
+    template <
+        typename object_detector_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> test_object_detection_function (
+        object_detector_type& detector,
+        const image_array_type& images,
+        const std::vector<std::vector<full_object_detection> >& truth_dets,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    )
+    {
+        std::vector<std::vector<rectangle> > ignore(images.size());
+        return test_object_detection_function(detector,images,truth_dets,ignore, overlap_tester, adjust_threshold);
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <
+            typename array_type
+            >
+        struct array_subset_helper
+        {
+            typedef typename array_type::mem_manager_type mem_manager_type;
+
+            array_subset_helper (
+                const array_type& array_,
+                const std::vector<unsigned long>& idx_set_
+            ) :
+                array(array_),
+                idx_set(idx_set_)
+            {
+            }
+
+            unsigned long size() const { return idx_set.size(); }
+
+            typedef typename array_type::type type;
+            const type& operator[] (
+                unsigned long idx
+            ) const { return array[idx_set[idx]]; }
+
+        private:
+            const array_type& array;
+            const std::vector<unsigned long>& idx_set;
+        };
+
+        template <
+            typename T 
+            >
+        const matrix_op<op_array_to_mat<array_subset_helper<T> > > mat (
+            const array_subset_helper<T>& m 
+        )
+        {
+            typedef op_array_to_mat<array_subset_helper<T> > op;
+            return matrix_op<op>(op(m));
+        }
+
+    }
+
+// ----------------------------------------------------------------------------------------
+    
+    template <
+        typename trainer_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> cross_validate_object_detection_trainer (
+        const trainer_type& trainer,
+        const image_array_type& images,
+        const std::vector<std::vector<full_object_detection> >& truth_dets,
+        const std::vector<std::vector<rectangle> >& ignore,
+        const long folds,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_CASSERT( is_learning_problem(images,truth_dets) == true &&
+                     ignore.size() == images.size() &&
+                     1 < folds && folds <= static_cast<long>(images.size()),
+                    "\t matrix cross_validate_object_detection_trainer()"
+                    << "\n\t invalid inputs were given to this function"
+                    << "\n\t is_learning_problem(images,truth_dets): " << is_learning_problem(images,truth_dets)
+                    << "\n\t folds: "<< folds
+                    << "\n\t ignore.size(): " << ignore.size() 
+                    << "\n\t images.size(): " << images.size() 
+                    );
+
+        double correct_hits = 0;
+        double total_true_targets = 0;
+
+        const long test_size = images.size()/folds;
+
+        std::vector<std::pair<double,bool> > all_dets;
+        unsigned long missing_detections = 0;
+        unsigned long test_idx = 0;
+        for (long iter = 0; iter < folds; ++iter)
+        {
+            std::vector<unsigned long> train_idx_set;
+            std::vector<unsigned long> test_idx_set;
+
+            for (long i = 0; i < test_size; ++i)
+                test_idx_set.push_back(test_idx++);
+
+            unsigned long train_idx = test_idx%images.size();
+            std::vector<std::vector<full_object_detection> > training_rects;
+            std::vector<std::vector<rectangle> > training_ignores;
+            for (unsigned long i = 0; i < images.size()-test_size; ++i)
+            {
+                training_rects.push_back(truth_dets[train_idx]);
+                training_ignores.push_back(ignore[train_idx]);
+                train_idx_set.push_back(train_idx);
+                train_idx = (train_idx+1)%images.size();
+            }
+
+
+            impl::array_subset_helper<image_array_type> array_subset(images, train_idx_set);
+            typename trainer_type::trained_function_type detector = trainer.train(array_subset, training_rects, training_ignores, overlap_tester);
+            for (unsigned long i = 0; i < test_idx_set.size(); ++i)
+            {
+                std::vector<std::pair<double,rectangle> > hits; 
+                detector(images[test_idx_set[i]], hits, adjust_threshold);
+
+                correct_hits += impl::number_of_truth_hits(truth_dets[test_idx_set[i]], ignore[i], hits, overlap_tester, all_dets, missing_detections);
+                total_true_targets += truth_dets[test_idx_set[i]].size();
+            }
+
+        }
+
+        std::sort(all_dets.rbegin(), all_dets.rend());
+
+
+        double precision, recall;
+
+        double total_hits = all_dets.size();
+
+        if (total_hits == 0)
+            precision = 1;
+        else
+            precision = correct_hits / total_hits;
+
+        if (total_true_targets == 0)
+            recall = 1;
+        else
+            recall = correct_hits / total_true_targets;
+
+        matrix<double, 1, 3> res;
+        res = precision, recall, average_precision(all_dets, missing_detections);
+        return res;
+    }
+
+    template <
+        typename trainer_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> cross_validate_object_detection_trainer (
+        const trainer_type& trainer,
+        const image_array_type& images,
+        const std::vector<std::vector<rectangle> >& truth_dets,
+        const std::vector<std::vector<rectangle> >& ignore,
+        const long folds,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    )
+    {
+        // convert into a list of regular rectangles.
+        std::vector<std::vector<full_object_detection> > dets(truth_dets.size());
+        for (unsigned long i = 0; i < truth_dets.size(); ++i)
+        {
+            for (unsigned long j = 0; j < truth_dets[i].size(); ++j)
+            {
+                dets[i].push_back(full_object_detection(truth_dets[i][j]));
+            }
+        }
+
+        return cross_validate_object_detection_trainer(trainer, images, dets, ignore, folds, overlap_tester, adjust_threshold);
+    }
+
+    template <
+        typename trainer_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> cross_validate_object_detection_trainer (
+        const trainer_type& trainer,
+        const image_array_type& images,
+        const std::vector<std::vector<rectangle> >& truth_dets,
+        const long folds,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    )
+    {
+        const std::vector<std::vector<rectangle> > ignore(images.size());
+        return cross_validate_object_detection_trainer(trainer,images,truth_dets,ignore,folds,overlap_tester,adjust_threshold);
+    }
+
+    template <
+        typename trainer_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> cross_validate_object_detection_trainer (
+        const trainer_type& trainer,
+        const image_array_type& images,
+        const std::vector<std::vector<full_object_detection> >& truth_dets,
+        const long folds,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    )
+    {
+        const std::vector<std::vector<rectangle> > ignore(images.size());
+        return cross_validate_object_detection_trainer(trainer,images,truth_dets,ignore,folds,overlap_tester,adjust_threshold);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CROSS_VALIDATE_OBJECT_DETECTION_TRaINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/cross_validate_object_detection_trainer_abstract.h b/ml/dlib/dlib/svm/cross_validate_object_detection_trainer_abstract.h
new file mode 100644
index 000000000..575ed77fb
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_object_detection_trainer_abstract.h
@@ -0,0 +1,297 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_CROSS_VALIDATE_OBJECT_DETECTION_TRaINER_ABSTRACT_Hh_
+#ifdef DLIB_CROSS_VALIDATE_OBJECT_DETECTION_TRaINER_ABSTRACT_Hh_
+
+#include <vector>
+#include "../matrix.h"
+#include "../geometry.h"
+#include "../image_processing/full_object_detection_abstract.h"
+#include "../dnn/layers_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename object_detector_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> test_object_detection_function (
+        object_detector_type& detector,
+        const image_array_type& images,
+        const std::vector<std::vector<full_object_detection> >& truth_dets,
+        const std::vector<std::vector<rectangle> >& ignore,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    );
+    /*!
+        requires
+            - is_learning_problem(images,truth_dets)
+            - images.size() == ignore.size()
+            - object_detector_type == some kind of object detector function object
+              (e.g. object_detector)
+            - image_array_type must be an implementation of dlib/array/array_kernel_abstract.h 
+              and it must contain objects which can be accepted by detector().
+        ensures
+            - Tests the given detector against the supplied object detection problem and
+              returns the precision, recall, and average precision.  Note that the task is
+              to predict, for each images[i], the set of object locations given by
+              truth_dets[i].  Additionally, any detections on image[i] that match a box in
+              ignore[i] are ignored.  That is, detections matching a box in ignore[i] do
+              not count as a false alarm and similarly if any element of ignore[i] goes
+              undetected it does not count as a missed detection.  So we say that ignore[i]
+              contains a set of boxes that we "don't care" if they are detected or not.
+            - In particular, returns a matrix M such that:  
+                - M(0) == the precision of the detector object.  This is a number
+                  in the range [0,1] which measures the fraction of detector outputs
+                  which correspond to a real target.  A value of 1 means the detector
+                  never produces any false alarms while a value of 0 means it only
+                  produces false alarms.
+                - M(1) == the recall of the detector object.  This is a number in the
+                  range [0,1] which measures the fraction of targets found by the
+                  detector.  A value of 1 means the detector found all the targets
+                  in truth_dets while a value of 0 means the detector didn't locate
+                  any of the targets.
+                - M(2) == the average precision of the detector object.  This is a number
+                  in the range [0,1] which measures the overall quality of the detector.
+                  We compute this by taking all the detections output by the detector and
+                  ordering them in descending order of their detection scores.  Then we use
+                  the average_precision() routine to score the ranked listing and store the
+                  output into M(2).
+                - This function considers a detector output D to match a rectangle T if and
+                  only if overlap_tester(T,D) returns true. 
+                - Note that you can use the adjust_threshold argument to raise or lower the
+                  detection threshold.  This value is passed into the identically named
+                  argument to the detector object and therefore influences the number of
+                  output detections.  It can be useful, for example, to lower the detection
+                  threshold because it results in more detections being output by the
+                  detector, and therefore provides more information in the ranking,
+                  possibly raising the average precision.
+    !*/
+
+    template <
+        typename object_detector_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> test_object_detection_function (
+        object_detector_type& detector,
+        const image_array_type& images,
+        const std::vector<std::vector<rectangle> >& truth_dets,
+        const std::vector<std::vector<rectangle> >& ignore,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    );
+    /*!
+        requires
+            - All the requirements of the above test_object_detection_function() routine.
+        ensures
+            - converts all the rectangles in truth_dets into full_object_detection objects
+              via full_object_detection's rectangle constructor.  Then invokes
+              test_object_detection_function() on the full_object_detections and returns
+              the results.  
+    !*/
+
+    template <
+        typename object_detector_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> test_object_detection_function (
+        object_detector_type& detector,
+        const image_array_type& images,
+        const std::vector<std::vector<rectangle> >& truth_dets,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    );
+    /*!
+        requires
+            - All the requirements of the above test_object_detection_function() routine.
+        ensures
+            - This function simply invokes test_object_detection_function() with all the
+              given arguments and an empty set of ignore rectangles and returns the results.
+    !*/
+
+    template <
+        typename object_detector_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> test_object_detection_function (
+        object_detector_type& detector,
+        const image_array_type& images,
+        const std::vector<std::vector<full_object_detection> >& truth_dets,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    );
+    /*!
+        requires
+            - All the requirements of the above test_object_detection_function() routine.
+        ensures
+            - This function simply invokes test_object_detection_function() with all the
+              given arguments and an empty set of ignore rectangles and returns the results.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename SUBNET,
+        typename image_array_type
+        >
+    const matrix<double,1,3> test_object_detection_function (
+        loss_mmod<SUBNET>& detector,
+        const image_array_type& images,
+        const std::vector<std::vector<mmod_rect>>& truth_dets,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0,
+        const test_box_overlap& overlaps_ignore_tester = test_box_overlap()
+    );
+    /*!
+        requires
+            - is_learning_problem(images,truth_dets)
+            - image_array_type must be an implementation of dlib/array/array_kernel_abstract.h 
+              and it must contain objects which can be accepted by detector().
+        ensures
+            - This function is just like the test_object_detection_function() for
+              object_detector's except it runs on CNNs that use loss_mmod.
+            - Tests the given detector against the supplied object detection problem and
+              returns the precision, recall, and average precision.  Note that the task is
+              to predict, for each images[i], the set of object locations, and their
+              corresponding labels, given by truth_dets[i].  Additionally, any detections
+              on image[i] that match a box in truth_dets[i] that are marked ignore are
+              ignored.  That is, detections matching an ignore box, regardless of the
+              ignore box's label, do not count as a false alarm and similarly if any
+              ignored box in truth_dets goes undetected it does not count as a missed
+              detection.  To test if a box overlaps an ignore box, we use overlaps_ignore_tester.
+            - In particular, returns a matrix M such that:  
+                - M(0) == the precision of the detector object.  This is a number
+                  in the range [0,1] which measures the fraction of detector outputs
+                  which correspond to a real target.  A value of 1 means the detector
+                  never produces any false alarms while a value of 0 means it only
+                  produces false alarms.
+                - M(1) == the recall of the detector object.  This is a number in the
+                  range [0,1] which measures the fraction of targets found by the detector.
+                  A value of 1 means the detector found all the non-ignore targets in
+                  truth_dets while a value of 0 means the detector didn't locate any of the
+                  targets.
+                - M(2) == the average precision of the detector object.  This is a number
+                  in the range [0,1] which measures the overall quality of the detector.
+                  We compute this by taking all the detections output by the detector and
+                  ordering them in descending order of their detection scores.  Then we use
+                  the average_precision() routine to score the ranked listing and store the
+                  output into M(2).
+                - This function considers a detector output D to match a truth rectangle T if 
+                  and only if overlap_tester(T,D) returns true and the labels are identical strings. 
+                - Note that you can use the adjust_threshold argument to raise or lower the
+                  detection threshold.  This value is passed into the identically named
+                  argument to the detector object and therefore influences the number of
+                  output detections.  It can be useful, for example, to lower the detection
+                  threshold because it results in more detections being output by the
+                  detector, and therefore provides more information in the ranking,
+                  possibly raising the average precision.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> cross_validate_object_detection_trainer (
+        const trainer_type& trainer,
+        const image_array_type& images,
+        const std::vector<std::vector<full_object_detection> >& truth_dets,
+        const std::vector<std::vector<rectangle> >& ignore,
+        const long folds,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    );
+    /*!
+        requires
+            - is_learning_problem(images,truth_dets)
+            - images.size() == ignore.size()
+            - 1 < folds <= images.size()
+            - trainer_type == some kind of object detection trainer (e.g structural_object_detection_trainer)
+            - image_array_type must be an implementation of dlib/array/array_kernel_abstract.h 
+              and it must contain objects which can be accepted by detector().
+            - it is legal to call trainer.train(images, truth_dets)
+        ensures
+            - Performs k-fold cross-validation by using the given trainer to solve an
+              object detection problem for the given number of folds.  Each fold is tested
+              using the output of the trainer and a matrix summarizing the results is
+              returned.  The matrix contains the precision, recall, and average
+              precision of the trained detectors and is defined identically to the
+              test_object_detection_function() routine defined at the top of this file.
+    !*/
+
+    template <
+        typename trainer_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> cross_validate_object_detection_trainer (
+        const trainer_type& trainer,
+        const image_array_type& images,
+        const std::vector<std::vector<rectangle> >& truth_dets,
+        const std::vector<std::vector<rectangle> >& ignore,
+        const long folds,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    );
+    /*!
+        requires
+            - all the requirements of the above cross_validate_object_detection_trainer() routine.
+        ensures
+            - converts all the rectangles in truth_dets into full_object_detection objects
+              via full_object_detection's rectangle constructor.  Then invokes
+              cross_validate_object_detection_trainer() on the full_object_detections and
+              returns the results.  
+    !*/
+
+    template <
+        typename trainer_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> cross_validate_object_detection_trainer (
+        const trainer_type& trainer,
+        const image_array_type& images,
+        const std::vector<std::vector<rectangle> >& truth_dets,
+        const long folds,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    );
+    /*!
+        requires
+            - All the requirements of the above cross_validate_object_detection_trainer() routine.
+        ensures
+            - This function simply invokes cross_validate_object_detection_trainer() with all
+              the given arguments and an empty set of ignore rectangles and returns the results.
+    !*/
+
+    template <
+        typename trainer_type,
+        typename image_array_type
+        >
+    const matrix<double,1,3> cross_validate_object_detection_trainer (
+        const trainer_type& trainer,
+        const image_array_type& images,
+        const std::vector<std::vector<full_object_detection> >& truth_dets,
+        const long folds,
+        const test_box_overlap& overlap_tester = test_box_overlap(),
+        const double adjust_threshold = 0
+    );
+    /*!
+        requires
+            - All the requirements of the above cross_validate_object_detection_trainer() routine.
+        ensures
+            - This function simply invokes cross_validate_object_detection_trainer() with all
+              the given arguments and an empty set of ignore rectangles and returns the results.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CROSS_VALIDATE_OBJECT_DETECTION_TRaINER_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/cross_validate_regression_trainer.h b/ml/dlib/dlib/svm/cross_validate_regression_trainer.h
new file mode 100644
index 000000000..a4c6077c9
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_regression_trainer.h
@@ -0,0 +1,155 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_CROSS_VALIDATE_REGRESSION_TRaINER_Hh_
+#define DLIB_CROSS_VALIDATE_REGRESSION_TRaINER_Hh_
+
+#include <vector>
+#include "../matrix.h"
+#include "../statistics.h"
+#include "cross_validate_regression_trainer_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename reg_funct_type,
+        typename sample_type,
+        typename label_type
+        >
+    matrix<double,1,4>
+    test_regression_function (
+        reg_funct_type& reg_funct,
+        const std::vector<sample_type>& x_test,
+        const std::vector<label_type>& y_test
+    )
+    {
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT( is_learning_problem(x_test,y_test) == true,
+                    "\tmatrix test_regression_function()"
+                    << "\n\t invalid inputs were given to this function"
+                    << "\n\t is_learning_problem(x_test,y_test): " 
+                    << is_learning_problem(x_test,y_test));
+
+        running_stats<double> rs, rs_mae;
+        running_scalar_covariance<double> rc;
+
+        for (unsigned long i = 0; i < x_test.size(); ++i)
+        {
+            // compute error
+            const double output = reg_funct(x_test[i]);
+            const double temp = output - y_test[i];
+
+            rs_mae.add(std::abs(temp));
+            rs.add(temp*temp);
+            rc.add(output, y_test[i]);
+        }
+
+        matrix<double,1,4> result;
+        result = rs.mean(), rc.correlation(), rs_mae.mean(), rs_mae.stddev();
+        return result;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename sample_type,
+        typename label_type 
+        >
+    matrix<double,1,4> 
+    cross_validate_regression_trainer (
+        const trainer_type& trainer,
+        const std::vector<sample_type>& x,
+        const std::vector<label_type>& y,
+        const long folds
+    )
+    {
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_learning_problem(x,y) == true &&
+                    1 < folds && folds <= static_cast<long>(x.size()),
+            "\tmatrix cross_validate_regression_trainer()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t x.size(): " << x.size() 
+            << "\n\t folds:  " << folds 
+            << "\n\t is_learning_problem(x,y): " << is_learning_problem(x,y)
+            );
+
+
+
+        const long num_in_test = x.size()/folds;
+        const long num_in_train = x.size() - num_in_test;
+
+        running_stats<double> rs, rs_mae;
+        running_scalar_covariance<double> rc;
+
+        std::vector<sample_type> x_test, x_train;
+        std::vector<label_type> y_test, y_train;
+
+
+        long next_test_idx = 0;
+
+
+        for (long i = 0; i < folds; ++i)
+        {
+            x_test.clear();
+            y_test.clear();
+            x_train.clear();
+            y_train.clear();
+
+            // load up the test samples
+            for (long cnt = 0; cnt < num_in_test; ++cnt)
+            {
+                x_test.push_back(x[next_test_idx]);
+                y_test.push_back(y[next_test_idx]);
+                next_test_idx = (next_test_idx + 1)%x.size();
+            }
+
+            // load up the training samples
+            long next = next_test_idx;
+            for (long cnt = 0; cnt < num_in_train; ++cnt)
+            {
+                x_train.push_back(x[next]);
+                y_train.push_back(y[next]);
+                next = (next + 1)%x.size();
+            }
+
+
+            try
+            {
+                const typename trainer_type::trained_function_type& df = trainer.train(x_train,y_train);
+
+                // do the training and testing
+                for (unsigned long j = 0; j < x_test.size(); ++j)
+                {
+                    // compute error
+                    const double output = df(x_test[j]);
+                    const double temp = output - y_test[j];
+
+                    rs_mae.add(std::abs(temp));
+                    rs.add(temp*temp);
+                    rc.add(output, y_test[j]);
+                }
+            }
+            catch (invalid_nu_error&)
+            {
+                // just ignore cases which result in an invalid nu
+            }
+
+        } // for (long i = 0; i < folds; ++i)
+
+        matrix<double,1,4> result;
+        result = rs.mean(), rc.correlation(), rs_mae.mean(), rs_mae.stddev();
+        return result;
+    }
+
+}
+
+// ----------------------------------------------------------------------------------------
+
+#endif // DLIB_CROSS_VALIDATE_REGRESSION_TRaINER_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/cross_validate_regression_trainer_abstract.h b/ml/dlib/dlib/svm/cross_validate_regression_trainer_abstract.h
new file mode 100644
index 000000000..d6298aa74
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_regression_trainer_abstract.h
@@ -0,0 +1,82 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_CROSS_VALIDATE_REGRESSION_TRaINER_ABSTRACT_Hh_
+#ifdef DLIB_CROSS_VALIDATE_REGRESSION_TRaINER_ABSTRACT_Hh_
+
+#include <vector>
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename reg_funct_type,
+        typename sample_type,
+        typename label_type
+        >
+    matrix<double,1,4>
+    test_regression_function (
+        reg_funct_type& reg_funct,
+        const std::vector<sample_type>& x_test,
+        const std::vector<label_type>& y_test
+    );
+    /*!
+        requires
+            - is_learning_problem(x_test, y_test)
+            - reg_funct_type == some kind of regression function object 
+              (e.g. a decision_function created by the svr_trainer )
+        ensures
+            - Tests reg_funct against the given samples in x_test and target values in 
+              y_test and returns a matrix M summarizing the results.  Specifically:
+                - M(0) == the mean squared error.  
+                  The MSE is given by: sum over i: pow(reg_funct(x_test[i]) - y_test[i], 2.0)
+                - M(1) == the correlation between reg_funct(x_test[i]) and y_test[i].
+                  This is a number between -1 and 1.
+                - M(2) == the mean absolute error.  
+                  This is given by: sum over i: abs(reg_funct(x_test[i]) - y_test[i])
+                - M(3) == the standard deviation of the absolute error.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename sample_type,
+        typename label_type 
+        >
+    matrix<double,1,4>
+    cross_validate_regression_trainer (
+        const trainer_type& trainer,
+        const std::vector<sample_type>& x,
+        const std::vector<label_type>& y,
+        const long folds
+    );
+    /*!
+        requires
+            - is_learning_problem(x,y)
+            - 1 < folds <= x.size()
+            - trainer_type == some kind of regression trainer object (e.g. svr_trainer)
+        ensures
+            - Performs k-fold cross validation by using the given trainer to solve a 
+              regression problem for the given number of folds.  Each fold is tested using 
+              the output of the trainer.  A matrix M summarizing the results is returned.  
+              Specifically:
+                - M(0) == the mean squared error.  
+                  The MSE is given by: sum over i: pow(reg_funct(x[i]) - y[i], 2.0)
+                - M(1) == the correlation between a predicted y value and its true value.
+                  This is a number between -1 and 1.
+                - M(2) == the mean absolute error.  
+                  This is given by: sum over i: abs(reg_funct(x_test[i]) - y_test[i])
+                - M(3) == the standard deviation of the absolute error.
+    !*/
+
+}
+
+// ----------------------------------------------------------------------------------------
+
+#endif // DLIB_CROSS_VALIDATE_REGRESSION_TRaINER_ABSTRACT_Hh_
+
+
+
diff --git a/ml/dlib/dlib/svm/cross_validate_sequence_labeler.h b/ml/dlib/dlib/svm/cross_validate_sequence_labeler.h
new file mode 100644
index 000000000..75c4e363a
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_sequence_labeler.h
@@ -0,0 +1,152 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_Hh_
+#define DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_Hh_
+
+#include "cross_validate_sequence_labeler_abstract.h"
+#include <vector>
+#include "../matrix.h"
+#include "svm.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sequence_labeler_type,
+        typename sequence_type 
+        >
+    const matrix<double> test_sequence_labeler (
+        const sequence_labeler_type& labeler,
+        const std::vector<sequence_type>& samples,
+        const std::vector<std::vector<unsigned long> >& labels
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT( is_sequence_labeling_problem(samples, labels) == true,
+                    "\tmatrix test_sequence_labeler()"
+                    << "\n\t invalid inputs were given to this function"
+                    << "\n\t is_sequence_labeling_problem(samples, labels): " 
+                    << is_sequence_labeling_problem(samples, labels));
+
+        matrix<double> res(labeler.num_labels(), labeler.num_labels());
+        res = 0;
+
+        std::vector<unsigned long> pred;
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            labeler.label_sequence(samples[i], pred);
+
+            for (unsigned long j = 0; j < pred.size(); ++j)
+            {
+                const unsigned long truth = labels[i][j];
+                if (truth >= static_cast<unsigned long>(res.nr()))
+                {
+                    // ignore labels the labeler doesn't know about.
+                    continue;
+                }
+
+                res(truth, pred[j]) += 1;
+            }
+        }
+
+        return res;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename sequence_type
+        >
+    const matrix<double> cross_validate_sequence_labeler (
+        const trainer_type& trainer,
+        const std::vector<sequence_type>& samples,
+        const std::vector<std::vector<unsigned long> >& labels,
+        const long folds
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_sequence_labeling_problem(samples,labels) == true &&
+                    1 < folds && folds <= static_cast<long>(samples.size()),
+            "\tmatrix cross_validate_sequence_labeler()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t samples.size(): " << samples.size() 
+            << "\n\t folds:  " << folds 
+            << "\n\t is_sequence_labeling_problem(samples,labels): " << is_sequence_labeling_problem(samples,labels)
+            );
+
+#ifdef ENABLE_ASSERTS
+        for (unsigned long i = 0; i < labels.size(); ++i)
+        {
+            for (unsigned long j = 0; j < labels[i].size(); ++j)
+            {
+                // make sure requires clause is not broken
+                DLIB_ASSERT(labels[i][j] < trainer.num_labels(),
+                            "\t matrix cross_validate_sequence_labeler()"
+                            << "\n\t The labels are invalid."
+                            << "\n\t labels[i][j]: " << labels[i][j] 
+                            << "\n\t trainer.num_labels(): " << trainer.num_labels()
+                            << "\n\t i: " << i 
+                            << "\n\t j: " << j 
+                );
+            }
+        }
+#endif
+
+
+
+
+        const long num_in_test = samples.size()/folds;
+        const long num_in_train = samples.size() - num_in_test;
+
+        std::vector<sequence_type> x_test, x_train;
+        std::vector<std::vector<unsigned long> > y_test, y_train;
+
+
+        long next_test_idx = 0;
+
+        matrix<double> res;
+
+
+        for (long i = 0; i < folds; ++i)
+        {
+            x_test.clear();
+            y_test.clear();
+            x_train.clear();
+            y_train.clear();
+
+            // load up the test samples
+            for (long cnt = 0; cnt < num_in_test; ++cnt)
+            {
+                x_test.push_back(samples[next_test_idx]);
+                y_test.push_back(labels[next_test_idx]);
+                next_test_idx = (next_test_idx + 1)%samples.size();
+            }
+
+            // load up the training samples
+            long next = next_test_idx;
+            for (long cnt = 0; cnt < num_in_train; ++cnt)
+            {
+                x_train.push_back(samples[next]);
+                y_train.push_back(labels[next]);
+                next = (next + 1)%samples.size();
+            }
+
+
+            res += test_sequence_labeler(trainer.train(x_train,y_train), x_test, y_test);
+
+        } // for (long i = 0; i < folds; ++i)
+
+        return res;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/cross_validate_sequence_labeler_abstract.h b/ml/dlib/dlib/svm/cross_validate_sequence_labeler_abstract.h
new file mode 100644
index 000000000..3d2409b28
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_sequence_labeler_abstract.h
@@ -0,0 +1,83 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_ABSTRACT_Hh_
+#ifdef DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_ABSTRACT_Hh_
+
+#include <vector>
+#include "../matrix.h"
+#include "svm.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sequence_labeler_type,
+        typename sequence_type 
+        >
+    const matrix<double> test_sequence_labeler (
+        const sequence_labeler_type& labeler,
+        const std::vector<sequence_type>& samples,
+        const std::vector<std::vector<unsigned long> >& labels
+    );
+    /*!
+        requires
+            - is_sequence_labeling_problem(samples, labels)
+            - sequence_labeler_type == dlib::sequence_labeler or an object with a 
+              compatible interface.
+        ensures
+            - Tests labeler against the given samples and labels and returns a confusion 
+              matrix summarizing the results.
+            - The confusion matrix C returned by this function has the following properties.
+                - C.nc() == labeler.num_labels()
+                - C.nr() == labeler.num_labels() 
+                - C(T,P) == the number of times a sequence element with label T was predicted
+                  to have a label of P.
+            - Any samples with a label value >= labeler.num_labels() are ignored.  That 
+              is, samples with labels the labeler hasn't ever seen before are ignored.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename sequence_type
+        >
+    const matrix<double> cross_validate_sequence_labeler (
+        const trainer_type& trainer,
+        const std::vector<sequence_type>& samples,
+        const std::vector<std::vector<unsigned long> >& labels,
+        const long folds
+    );
+    /*!
+        requires
+            - is_sequence_labeling_problem(samples, labels)
+            - 1 < folds <= samples.size()
+            - for all valid i and j: labels[i][j] < trainer.num_labels()
+            - trainer_type == dlib::structural_sequence_labeling_trainer or an object
+              with a compatible interface.
+        ensures
+            - performs k-fold cross validation by using the given trainer to solve the
+              given sequence labeling problem for the given number of folds.  Each fold 
+              is tested using the output of the trainer and the confusion matrix from all 
+              folds is summed and returned.
+            - The total confusion matrix is computed by running test_sequence_labeler()
+              on each fold and summing its output.
+            - The number of folds used is given by the folds argument.
+            - The confusion matrix C returned by this function has the following properties.
+                - C.nc() == trainer.num_labels()
+                - C.nr() == trainer.num_labels() 
+                - C(T,P) == the number of times a sequence element with label T was predicted
+                  to have a label of P.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CROSS_VALIDATE_SEQUENCE_LABeLER_ABSTRACT_Hh_
+
+
+
diff --git a/ml/dlib/dlib/svm/cross_validate_sequence_segmenter.h b/ml/dlib/dlib/svm/cross_validate_sequence_segmenter.h
new file mode 100644
index 000000000..8413f9165
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_sequence_segmenter.h
@@ -0,0 +1,187 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_CROSS_VALIDATE_SEQUENCE_sEGMENTER_Hh_
+#define DLIB_CROSS_VALIDATE_SEQUENCE_sEGMENTER_Hh_
+
+#include "cross_validate_sequence_segmenter_abstract.h"
+#include "sequence_segmenter.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <
+            typename sequence_segmenter_type,
+            typename sequence_type 
+            >
+        const matrix<double,1,3> raw_metrics_test_sequence_segmenter (
+            const sequence_segmenter_type& segmenter,
+            const std::vector<sequence_type>& samples,
+            const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments 
+        )
+        {
+            std::vector<std::pair<unsigned long,unsigned long> > truth;
+            std::vector<std::pair<unsigned long,unsigned long> > pred;
+
+            double true_hits = 0;
+            double total_detections = 0;
+            double total_true_segments = 0;
+
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                segmenter.segment_sequence(samples[i], pred);
+                truth = segments[i];
+                // sort the segments so they will be in the same orders
+                std::sort(truth.begin(), truth.end());
+                std::sort(pred.begin(), pred.end());
+
+                total_true_segments += truth.size();
+                total_detections += pred.size();
+
+                unsigned long j=0,k=0;
+                while (j < pred.size() && k < truth.size())
+                {
+                    if (pred[j].first == truth[k].first && 
+                        pred[j].second == truth[k].second)
+                    {
+                        ++true_hits;
+                        ++j;
+                        ++k;
+                    }
+                    else if (pred[j].first < truth[k].first)
+                    {
+                        ++j;
+                    }
+                    else
+                    {
+                        ++k;
+                    }
+                }
+            }
+
+            matrix<double,1,3> res;
+            res = total_detections, total_true_segments, true_hits;
+            return res;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sequence_segmenter_type,
+        typename sequence_type 
+        >
+    const matrix<double,1,3> test_sequence_segmenter (
+        const sequence_segmenter_type& segmenter,
+        const std::vector<sequence_type>& samples,
+        const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments 
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT( is_sequence_segmentation_problem(samples, segments) == true,
+                    "\tmatrix test_sequence_segmenter()"
+                    << "\n\t invalid inputs were given to this function"
+                    << "\n\t is_sequence_segmentation_problem(samples, segments): " 
+                    << is_sequence_segmentation_problem(samples, segments));
+
+        const matrix<double,1,3> metrics = impl::raw_metrics_test_sequence_segmenter(segmenter, samples, segments);
+
+        const double total_detections    = metrics(0);
+        const double total_true_segments = metrics(1);
+        const double true_hits           = metrics(2);
+        
+        const double precision = (total_detections   ==0) ? 1 : true_hits/total_detections;
+        const double recall    = (total_true_segments==0) ? 1 : true_hits/total_true_segments;
+        const double f1        = (precision+recall   ==0) ? 0 : 2*precision*recall/(precision+recall);
+
+        matrix<double,1,3> res;
+        res = precision, recall, f1;
+        return res;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename sequence_type 
+        >
+    const matrix<double,1,3> cross_validate_sequence_segmenter (
+        const trainer_type& trainer,
+        const std::vector<sequence_type>& samples,
+        const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments,
+        const long folds
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT( is_sequence_segmentation_problem(samples, segments) == true &&
+                    1 < folds && folds <= static_cast<long>(samples.size()),
+                    "\tmatrix cross_validate_sequence_segmenter()"
+                    << "\n\t invalid inputs were given to this function"
+                    << "\n\t folds:  " << folds 
+                    << "\n\t is_sequence_segmentation_problem(samples, segments): " 
+                    << is_sequence_segmentation_problem(samples, segments));
+
+
+        const long num_in_test = samples.size()/folds;
+        const long num_in_train = samples.size() - num_in_test;
+
+        std::vector<sequence_type> x_test, x_train;
+        std::vector<std::vector<std::pair<unsigned long,unsigned long> > > y_test, y_train;
+
+        long next_test_idx = 0;
+
+        matrix<double,1,3> metrics;
+        metrics = 0;
+
+        for (long i = 0; i < folds; ++i)
+        {
+            x_test.clear();
+            y_test.clear();
+            x_train.clear();
+            y_train.clear();
+
+            // load up the test samples
+            for (long cnt = 0; cnt < num_in_test; ++cnt)
+            {
+                x_test.push_back(samples[next_test_idx]);
+                y_test.push_back(segments[next_test_idx]);
+                next_test_idx = (next_test_idx + 1)%samples.size();
+            }
+
+            // load up the training samples
+            long next = next_test_idx;
+            for (long cnt = 0; cnt < num_in_train; ++cnt)
+            {
+                x_train.push_back(samples[next]);
+                y_train.push_back(segments[next]);
+                next = (next + 1)%samples.size();
+            }
+
+
+            metrics += impl::raw_metrics_test_sequence_segmenter(trainer.train(x_train,y_train), x_test, y_test);
+        } // for (long i = 0; i < folds; ++i)
+
+
+        const double total_detections    = metrics(0);
+        const double total_true_segments = metrics(1);
+        const double true_hits           = metrics(2);
+        
+        const double precision = (total_detections   ==0) ? 1 : true_hits/total_detections;
+        const double recall    = (total_true_segments==0) ? 1 : true_hits/total_true_segments;
+        const double f1        = (precision+recall   ==0) ? 0 : 2*precision*recall/(precision+recall);
+
+        matrix<double,1,3> res;
+        res = precision, recall, f1;
+        return res;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CROSS_VALIDATE_SEQUENCE_sEGMENTER_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/cross_validate_sequence_segmenter_abstract.h b/ml/dlib/dlib/svm/cross_validate_sequence_segmenter_abstract.h
new file mode 100644
index 000000000..87e21d592
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_sequence_segmenter_abstract.h
@@ -0,0 +1,80 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_CROSS_VALIDATE_SEQUENCE_sEGMENTER_ABSTRACT_Hh_
+#ifdef DLIB_CROSS_VALIDATE_SEQUENCE_sEGMENTER_ABSTRACT_Hh_
+
+#include "sequence_segmenter_abstract.h"
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sequence_segmenter_type,
+        typename sequence_type 
+        >
+    const matrix<double,1,3> test_sequence_segmenter (
+        const sequence_segmenter_type& segmenter,
+        const std::vector<sequence_type>& samples,
+        const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments 
+    );
+    /*!
+        requires
+            - is_sequence_segmentation_problem(samples, segments) == true
+            - sequence_segmenter_type == dlib::sequence_segmenter or an object with a
+              compatible interface.
+        ensures
+            - Tests segmenter against the given samples and truth segments and returns the
+              precision, recall, and F1-score obtained by the segmenter.  That is, the goal
+              of the segmenter should be to predict segments[i] given samples[i] as input.
+              The test_sequence_segmenter() routine therefore measures how well the
+              segmenter is able to perform this task.
+            - Returns a row matrix M with the following properties:
+                - M(0) == The precision of the segmenter measured against the task of
+                  detecting the segments of each sample.  This is a number in the range 0
+                  to 1 and represents the fraction of segments output by the segmenter
+                  which correspond to true segments for each sample.
+                - M(1) == The recall of the segmenter measured against the task of
+                  detecting the segments of each sample.  This is a number in the range 0
+                  to 1 and represents the fraction of the true segments found by the
+                  segmenter. 
+                - M(2) == The F1-score for the segmenter.  This is the harmonic mean of
+                  M(0) and M(1).
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename sequence_type 
+        >
+    const matrix<double,1,3> cross_validate_sequence_segmenter (
+        const trainer_type& trainer,
+        const std::vector<sequence_type>& samples,
+        const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments,
+        const long folds
+    );
+    /*!
+        requires
+            - is_sequence_segmentation_problem(samples, segments) == true
+            - 1 < folds <= samples.size()
+            - trainer_type == dlib::structural_sequence_segmentation_trainer or an object
+              with a compatible interface.
+        ensures
+            - Performs k-fold cross validation by using the given trainer to solve the
+              given sequence segmentation problem for the given number of folds.  Each fold
+              is tested using the output of the trainer and the results from all folds are
+              summarized and returned. 
+            - This function returns the precision, recall, and F1-score for the trainer.
+              In particular, the output is the same as the output from the
+              test_sequence_segmenter() routine defined above.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CROSS_VALIDATE_SEQUENCE_sEGMENTER_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/cross_validate_track_association_trainer.h b/ml/dlib/dlib/svm/cross_validate_track_association_trainer.h
new file mode 100644
index 000000000..dac519b7a
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_track_association_trainer.h
@@ -0,0 +1,163 @@
+// Copyright (C) 2014  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_CROSS_VALIDATE_TRACK_ASSOCIATION_TrAINER_Hh_
+#define DLIB_CROSS_VALIDATE_TRACK_ASSOCIATION_TrAINER_Hh_
+
+#include "cross_validate_track_association_trainer_abstract.h"
+#include "structural_track_association_trainer.h"
+
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <
+            typename track_association_function,
+            typename detection_type,
+            typename label_type
+            >
+        void test_track_association_function (
+            const track_association_function& assoc,
+            const std::vector<std::vector<labeled_detection<detection_type,label_type> > >& samples,
+            unsigned long& total_dets,
+            unsigned long& correctly_associated_dets
+        )
+        {
+            const typename track_association_function::association_function_type& f = assoc.get_assignment_function();
+
+            typedef typename detection_type::track_type track_type;
+            using namespace impl;
+
+            dlib::rand rnd;
+            std::vector<track_type> tracks;
+            std::map<label_type,long> track_idx; // tracks[track_idx[id]] == track with ID id.
+
+            for (unsigned long j = 0; j < samples.size(); ++j)
+            {
+                std::vector<labeled_detection<detection_type,label_type> > dets = samples[j];
+                // Shuffle the order of the detections so we can be sure that there isn't
+                // anything funny going on like the detections always coming in the same
+                // order relative to their labels and the association function just gets
+                // lucky by picking the same assignment ordering every time.  So this way
+                // we know the assignment function really is doing something rather than
+                // just being lucky.
+                randomize_samples(dets, rnd);
+
+                total_dets += dets.size();
+                std::vector<long> assignments = f(get_unlabeled_dets(dets), tracks);
+                std::vector<bool> updated_track(tracks.size(), false);
+                // now update all the tracks with the detections that associated to them.
+                for (unsigned long k = 0; k < assignments.size(); ++k)
+                {
+                    // If the detection is associated to tracks[assignments[k]]
+                    if (assignments[k] != -1)
+                    {
+                        tracks[assignments[k]].update_track(dets[k].det);
+                        updated_track[assignments[k]] = true;
+
+                        // if this detection was supposed to go to this track
+                        if (track_idx.count(dets[k].label) && track_idx[dets[k].label]==assignments[k])
+                            ++correctly_associated_dets;
+
+                        track_idx[dets[k].label] = assignments[k];
+                    }
+                    else
+                    {
+                        track_type new_track;
+                        new_track.update_track(dets[k].det);
+                        tracks.push_back(new_track);
+
+                        // if this detection was supposed to go to a new track
+                        if (track_idx.count(dets[k].label) == 0)
+                            ++correctly_associated_dets;
+
+                        track_idx[dets[k].label] = tracks.size()-1;
+                    }
+                }
+
+                // Now propagate all the tracks that didn't get any detections.
+                for (unsigned long k = 0; k < updated_track.size(); ++k)
+                {
+                    if (!updated_track[k])
+                        tracks[k].propagate_track();
+                }
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename track_association_function,
+        typename detection_type,
+        typename label_type
+        >
+    double test_track_association_function (
+        const track_association_function& assoc,
+        const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples
+    )
+    {
+        unsigned long total_dets = 0;
+        unsigned long correctly_associated_dets = 0;
+
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            impl::test_track_association_function(assoc, samples[i], total_dets, correctly_associated_dets);
+        }
+
+        return (double)correctly_associated_dets/(double)total_dets;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename detection_type,
+        typename label_type
+        >
+    double cross_validate_track_association_trainer (
+        const trainer_type& trainer,
+        const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples,
+        const long folds
+    )
+    {
+        const long num_in_test  = samples.size()/folds;
+        const long num_in_train = samples.size() - num_in_test;
+
+        std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > > samples_train;
+
+        long next_test_idx = 0;
+        unsigned long total_dets = 0;
+        unsigned long correctly_associated_dets = 0;
+
+        for (long i = 0; i < folds; ++i)
+        {
+            samples_train.clear();
+
+            // load up the training samples
+            long next = (next_test_idx + num_in_test)%samples.size();
+            for (long cnt = 0; cnt < num_in_train; ++cnt)
+            {
+                samples_train.push_back(samples[next]);
+                next = (next + 1)%samples.size();
+            }
+
+            const track_association_function<detection_type>& df = trainer.train(samples_train);
+            for (long cnt = 0; cnt < num_in_test; ++cnt)
+            {
+                impl::test_track_association_function(df, samples[next_test_idx], total_dets, correctly_associated_dets);
+                next_test_idx = (next_test_idx + 1)%samples.size();
+            }
+        }
+
+        return (double)correctly_associated_dets/(double)total_dets;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CROSS_VALIDATE_TRACK_ASSOCIATION_TrAINER_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/cross_validate_track_association_trainer_abstract.h b/ml/dlib/dlib/svm/cross_validate_track_association_trainer_abstract.h
new file mode 100644
index 000000000..76b985600
--- /dev/null
+++ b/ml/dlib/dlib/svm/cross_validate_track_association_trainer_abstract.h
@@ -0,0 +1,69 @@
+// Copyright (C) 2014  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_CROSS_VALIDATE_TRACK_ASSOCIATION_TrAINER_ABSTRACT_Hh_
+#ifdef DLIB_CROSS_VALIDATE_TRACK_ASSOCIATION_TrAINER_ABSTRACT_Hh_
+
+#include "structural_track_association_trainer_abstract.h"
+#include "svm_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename track_association_function,
+        typename detection_type,
+        typename label_type
+        >
+    double test_track_association_function (
+        const track_association_function& assoc,
+        const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples
+    );
+    /*!
+        requires
+            - is_track_association_problem(samples)
+            - track_association_function == an instantiation of the dlib::track_association_function
+              template or an object with a compatible interface.
+        ensures
+            - Tests assoc against the given samples and returns the fraction of detections
+              which were correctly associated to their tracks.  That is, if assoc produces
+              perfect tracks when used then this function returns a value of 1.  Similarly,
+              if 5% of the detections were associated to the incorrect track then the
+              return value is 0.05.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename detection_type,
+        typename label_type
+        >
+    double cross_validate_track_association_trainer (
+        const trainer_type& trainer,
+        const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples,
+        const long folds
+    );
+    /*!
+        requires
+            - is_track_association_problem(samples)
+            - 1 < folds <= samples.size()
+            - trainer_type == dlib::structural_track_association_trainer or an object with
+              a compatible interface.
+        ensures
+            - Performs k-fold cross validation by using the given trainer to solve the
+              given track association learning problem for the given number of folds.  Each
+              fold is tested using the output of the trainer and the fraction of
+              mis-associated detections is returned (i.e. this function returns the same
+              measure of track association quality as test_track_association_function()).
+            - The number of folds used is given by the folds argument.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_CROSS_VALIDATE_TRACK_ASSOCIATION_TrAINER_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/empirical_kernel_map.h b/ml/dlib/dlib/svm/empirical_kernel_map.h
new file mode 100644
index 000000000..7a91e591a
--- /dev/null
+++ b/ml/dlib/dlib/svm/empirical_kernel_map.h
@@ -0,0 +1,429 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_EMPIRICAL_KERNEl_MAP_H_
+#define DLIB_EMPIRICAL_KERNEl_MAP_H_
+
+#include "../matrix.h"
+#include "empirical_kernel_map_abstract.h"
+#include "linearly_independent_subset_finder.h"
+#include <vector>
+#include "../algs.h"
+#include "kernel_matrix.h"
+#include "function.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename kernel_type, typename EXP>
+    const decision_function<kernel_type> convert_to_decision_function (
+        const projection_function<kernel_type>& project_funct,
+        const matrix_exp<EXP>& vect
+    ) 
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(project_funct.out_vector_size() > 0 && is_vector(vect) && 
+                    project_funct.out_vector_size() == vect.size() && project_funct.weights.nc() == project_funct.basis_vectors.size(),
+            "\t const decision_function convert_to_decision_function()"
+            << "\n\t Invalid inputs to this function."
+            << "\n\t project_funct.out_vector_size():    " << project_funct.out_vector_size() 
+            << "\n\t project_funct.weights.nc():         " << project_funct.weights.nc() 
+            << "\n\t project_funct.basis_vectors.size(): " << project_funct.basis_vectors.size() 
+            << "\n\t is_vector(vect):                    " << is_vector(vect) 
+            << "\n\t vect.size():                        " << vect.size() 
+            );
+
+        return decision_function<kernel_type>(trans(project_funct.weights)*vect, 
+                                              0, 
+                                              project_funct.kernel_function,
+                                              project_funct.basis_vectors);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename kern_type>
+    class empirical_kernel_map
+    {
+    public:
+
+        struct empirical_kernel_map_error : public error
+        {
+            empirical_kernel_map_error(const std::string& message): error(message) {}
+        };
+
+        typedef kern_type kernel_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+        void clear (
+        )
+        {
+            empirical_kernel_map().swap(*this);
+        }
+
+        template <typename T>
+        void load(
+            const kernel_type& kernel_,
+            const T& basis_samples
+        )
+        {
+            load_impl(kernel_, mat(basis_samples));
+        }
+
+        void load(
+            const linearly_independent_subset_finder<kernel_type>& lisf
+        )
+        {
+            if (lisf.size() == 0)
+            {
+                std::ostringstream sout;
+                sout << "An empty linearly_independent_subset_finder was supplied to the\n"
+                     << "empirical_kernel_map::load() function.  One reason this might occur\n"
+                     << "is if your dataset contains only zero vectors (or vectors \n"
+                     << "approximately zero).\n";
+                clear();
+                throw empirical_kernel_map_error(sout.str());
+            }
+
+            kernel = lisf.get_kernel();
+            weights = trans(chol(lisf.get_inv_kernel_marix()));
+            basis.resize(lisf.size());
+            for (unsigned long i = 0; i < basis.size(); ++i)
+                basis[i] = lisf[i];
+
+        }
+
+        const kernel_type get_kernel (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(out_vector_size() > 0,
+                "\tconst kernel_type empirical_kernel_map::get_kernel()"
+                << "\n\t You have to load this object with a kernel before you can call this function"
+                << "\n\t this: " << this
+                );
+
+            return kernel;
+        }
+
+        long out_vector_size (
+        ) const
+        {
+            return weights.nr();
+        }
+
+        unsigned long basis_size (
+        ) const
+        {
+            return basis.size();
+        }
+
+        const sample_type& operator[] (
+            unsigned long idx
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( idx < basis_size(),
+                "\t const sample_type& empirical_kernel_map::operator[](idx)"
+                << "\n\t Invalid inputs to this function."
+                << "\n\t basis_size(): " << basis_size() 
+                << "\n\t this:         " << this
+                );
+
+            return basis[idx];
+        }
+
+        template <typename EXP>
+        const decision_function<kernel_type> convert_to_decision_function (
+            const matrix_exp<EXP>& vect
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(out_vector_size() != 0 && is_vector(vect) && out_vector_size() == vect.size(),
+                "\t const decision_function empirical_kernel_map::convert_to_decision_function()"
+                << "\n\t Invalid inputs to this function."
+                << "\n\t out_vector_size(): " << out_vector_size() 
+                << "\n\t is_vector(vect):   " << is_vector(vect) 
+                << "\n\t vect.size():       " << vect.size() 
+                << "\n\t this: " << this
+                );
+
+            return decision_function<kernel_type>(trans(weights)*vect, 0, kernel, mat(basis));
+        }
+
+        template <typename EXP>
+        const distance_function<kernel_type> convert_to_distance_function (
+            const matrix_exp<EXP>& vect
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(out_vector_size() != 0 && is_vector(vect) && out_vector_size() == vect.size(),
+                "\t const distance_function empirical_kernel_map::convert_to_distance_function()"
+                << "\n\t Invalid inputs to this function."
+                << "\n\t out_vector_size(): " << out_vector_size() 
+                << "\n\t is_vector(vect):   " << is_vector(vect) 
+                << "\n\t vect.size():       " << vect.size() 
+                << "\n\t this: " << this
+                );
+
+            return distance_function<kernel_type>(trans(weights)*vect, dot(vect,vect), kernel, mat(basis));
+        }
+
+        const projection_function<kernel_type> get_projection_function (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(out_vector_size() != 0,
+                "\tconst projection_function empirical_kernel_map::get_projection_function()"
+                << "\n\t You have to load this object with data before you can call this function"
+                << "\n\t this: " << this
+                );
+
+            return projection_function<kernel_type>(weights, kernel, mat(basis));
+        }
+
+        const matrix<scalar_type,0,0,mem_manager_type> get_transformation_to (
+            const empirical_kernel_map& target
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(out_vector_size() != 0 && 
+                        target.out_vector_size() != 0 &&
+                        get_kernel() == target.get_kernel(),
+                "\t const matrix empirical_kernel_map::get_transformation_to(target)"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t out_vector_size():                 " << out_vector_size() 
+                << "\n\t target.out_vector_size():          " << target.out_vector_size() 
+                << "\n\t get_kernel()==target.get_kernel(): " << (get_kernel()==target.get_kernel())
+                << "\n\t this: " << this
+                );
+
+            return target.weights * kernel_matrix(target.get_kernel(),target.basis, basis)*trans(weights);
+        }
+
+        void get_transformation_to (
+            const empirical_kernel_map& target,
+            matrix<scalar_type, 0, 0, mem_manager_type>& tmat,
+            projection_function<kernel_type>& partial_projection
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(out_vector_size() != 0 && 
+                        target.out_vector_size() != 0 &&
+                        get_kernel() == target.get_kernel() &&
+                        basis_size() < target.basis_size(),
+                "\t void empirical_kernel_map::get_transformation_to(target, tmat, partial_projection)"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t out_vector_size():                 " << out_vector_size() 
+                << "\n\t target.out_vector_size():          " << target.out_vector_size() 
+                << "\n\t basis_size():                      " << basis_size() 
+                << "\n\t target.basis_size():               " << target.basis_size() 
+                << "\n\t get_kernel()==target.get_kernel(): " << (get_kernel()==target.get_kernel())
+                << "\n\t this: " << this
+                );
+
+#ifdef ENABLE_ASSERTS
+            for (unsigned long i = 0; i < basis_size(); ++i)
+            {
+                DLIB_ASSERT(dlib::equal((*this)[i], target[i]), 
+                    "\t const matrix empirical_kernel_map::get_transformation_to(target, tmat, partial_projection)"
+                    << "\n\t target must contain a superset of the basis vectors in *this"
+                    << "\n\t i: " << i
+                    << "\n\t this: " << this
+                    );
+            }
+#endif
+
+            const unsigned long num1 = basis.size();
+            const unsigned long num2 = target.basis.size();
+
+            tmat = colm(target.weights, range(0,num1-1))*kernel_matrix(kernel, basis)*trans(weights);
+
+            empirical_kernel_map temp_ekm;
+            temp_ekm.load(kernel, rowm(mat(target.basis), range(num1,num2-1)));
+
+            partial_projection = temp_ekm.get_projection_function();
+
+            partial_projection.weights = colm(target.weights,range(num1,num2-1))*
+                                   kernel_matrix(kernel, temp_ekm.basis)*
+                                   trans(temp_ekm.weights)*
+                                   partial_projection.weights;
+        }
+
+        const matrix<scalar_type,0,1,mem_manager_type>& project (
+            const sample_type& samp
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(out_vector_size() != 0,
+                "\tconst matrix empirical_kernel_map::project()"
+                << "\n\t You have to load this object with data before you can call this function"
+                << "\n\t this: " << this
+                );
+
+            temp1 = kernel_matrix(kernel, basis, samp);
+            temp2 = weights*temp1;
+            return temp2;
+        }
+
+        const matrix<scalar_type,0,1,mem_manager_type>& project (
+            const sample_type& samp,
+            scalar_type& projection_error 
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(out_vector_size() != 0,
+                "\tconst matrix empirical_kernel_map::project()"
+                << "\n\t You have to load this object with data before you can call this function"
+                << "\n\t this: " << this
+                );
+
+            temp1 = kernel_matrix(kernel, basis, samp);
+            temp2 = weights*temp1;
+            // This value should never be negative (it measures squared distance) but I'm putting the abs() 
+            // here just for good measure since rounding error might push it slightly negative.
+            projection_error = std::abs( kernel(samp,samp) - dot(temp2,temp2));
+
+            return temp2;
+        }
+
+        void swap (
+            empirical_kernel_map& item
+        )
+        {
+            basis.swap(item.basis);
+            weights.swap(item.weights);
+            std::swap(kernel, item.kernel);
+
+            temp1.swap(item.temp1);
+            temp2.swap(item.temp2);
+        }
+
+        friend void serialize (
+            const empirical_kernel_map& item,
+            std::ostream& out
+        )
+        {
+            serialize(item.basis, out);
+            serialize(item.weights, out);
+            serialize(item.kernel, out);
+        }
+
+        friend void deserialize (
+            empirical_kernel_map& item,
+            std::istream& in 
+        )
+        {
+            deserialize(item.basis, in);
+            deserialize(item.weights, in);
+            deserialize(item.kernel, in);
+        }
+
+    private:
+
+        template <typename T>
+        void load_impl(
+            const kernel_type& kernel_,
+            const T& basis_samples
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(basis_samples.size() > 0 && is_vector(basis_samples),
+                "\tvoid empirical_kernel_map::load(kernel,basis_samples)"
+                << "\n\t You have to give a non-empty set of basis_samples and it must be a vector"
+                << "\n\t basis_samples.size():     " << basis_samples.size() 
+                << "\n\t is_vector(basis_samples): " << is_vector(basis_samples) 
+                << "\n\t this: " << this
+                );
+
+            // clear out the weights before we begin.  This way if an exception throws
+            // this object will already be in the right state.
+            weights.set_size(0,0);
+            kernel = kernel_;
+            basis.clear();
+            basis.reserve(basis_samples.size());
+
+            // find out the value of the largest norm of the elements in basis_samples.
+            const scalar_type max_norm = max(diag(kernel_matrix(kernel, basis_samples)));
+            // we will consider anything less than or equal to this number to be 0
+            const scalar_type eps = max_norm*100*std::numeric_limits<scalar_type>::epsilon();
+
+            // Copy all the basis_samples into basis but make sure we don't copy any samples
+            // that have length 0
+            for (long i = 0; i < basis_samples.size(); ++i)
+            {
+                const scalar_type norm = kernel(basis_samples(i), basis_samples(i));
+                if (norm > eps)
+                {
+                    basis.push_back(basis_samples(i));
+                }
+            }
+
+            if (basis.size() == 0)
+            {
+                clear();
+                throw empirical_kernel_map_error("All basis_samples given to empirical_kernel_map::load() were zero vectors");
+            }
+
+            matrix<scalar_type,0,0,mem_manager_type> K(kernel_matrix(kernel, basis)), U,W,V;
+
+            if (svd2(false,true,K,U,W,V))
+            {
+                clear();
+                throw empirical_kernel_map_error("While loading empirical_kernel_map with data, SVD failed to converge.");
+            }
+
+
+            // now count how many elements of W are non-zero
+            const long num_not_zero = static_cast<long>(sum(W>eps));
+
+            // Really, this should never happen.  But I'm checking for good measure.
+            if (num_not_zero == 0)
+            {
+                clear();
+                throw empirical_kernel_map_error("While loading empirical_kernel_map with data, SVD failed");
+            }
+
+            weights.set_size(num_not_zero, basis.size());
+
+            // now fill the weights matrix with the output of the SVD
+            long counter = 0;
+            for (long i =0; i < W.size(); ++i)
+            {
+                double val = W(i);
+                if (val > eps)
+                {
+                    val = std::sqrt(val);
+                    set_rowm(weights,counter) = rowm(trans(V),i)/val;
+                    ++counter;
+                }
+            }
+
+        }
+
+
+        std::vector<sample_type> basis;
+        matrix<scalar_type,0,0,mem_manager_type> weights;
+        kernel_type kernel;
+
+        // These members don't contribute to the logical state of this object.  They are
+        // just here so that they don't have to be reallocated every time the project() function
+        // is called.
+        mutable matrix<scalar_type,0,1,mem_manager_type> temp1, temp2;
+
+    };
+
+    template <typename kernel_type>
+    void swap (
+        empirical_kernel_map<kernel_type>& a,
+        empirical_kernel_map<kernel_type>& b
+    ) { a.swap(b); }
+    
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_EMPIRICAL_KERNEl_MAP_H_
+
diff --git a/ml/dlib/dlib/svm/empirical_kernel_map_abstract.h b/ml/dlib/dlib/svm/empirical_kernel_map_abstract.h
new file mode 100644
index 000000000..8fc413447
--- /dev/null
+++ b/ml/dlib/dlib/svm/empirical_kernel_map_abstract.h
@@ -0,0 +1,430 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_EMPIRICAL_KERNEl_MAP_ABSTRACT_H_
+#ifdef DLIB_EMPIRICAL_KERNEl_MAP_ABSTRACT_H_
+
+#include <vector>
+#include "../matrix.h"
+#include "kernel_abstract.h"
+#include "function_abstract.h"
+#include "linearly_independent_subset_finder_abstract.h"
+#include <vector>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type, 
+        typename EXP
+        >
+    const decision_function<kernel_type> convert_to_decision_function (
+        const projection_function<kernel_type>& project_funct,
+        const matrix_exp<EXP>& vect
+    );
+    /*!
+        requires
+            - is_vector(vect) == true
+            - vect.size() == project_funct.out_vector_size()
+            - project_funct.out_vector_size() > 0
+            - project_funct.weights.nc() == project_funct.basis_vectors.size()
+        ensures
+            - This function interprets the given vector as a point in the kernel feature space defined 
+              by the given projection function.  The return value of this function is a decision 
+              function, DF, that represents the given vector in the following sense:
+                - for all possible sample_type objects, S, it is the case that DF(S) == dot(project_funct(S), vect)
+                  (i.e. the returned decision function computes dot products, in kernel feature space, 
+                  between vect and any argument you give it.  Note also that this equality is exact, even
+                  for sample_type objects not in the span of the basis_vectors.)
+                - DF.kernel_function == project_funct.kernel_function
+                - DF.b == 0
+                - DF.basis_vectors == project_funct.basis_vectors.  
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kern_type
+        >
+    class empirical_kernel_map
+    {
+        /*!
+            REQUIREMENTS ON kern_type
+                - must be a kernel function object as defined in dlib/svm/kernel_abstract.h
+
+            INITIAL VALUE
+                - out_vector_size() == 0
+                - basis_size() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a map from objects of sample_type (the kind of object 
+                a kernel function operates on) to finite dimensional column vectors which 
+                represent points in the kernel feature space defined by whatever kernel 
+                is used with this object. 
+
+                To use the empirical_kernel_map you supply it with a particular kernel and a set of 
+                basis samples.  After that you can present it with new samples and it will project 
+                them into the part of kernel feature space spanned by your basis samples.   
+                
+                This means the empirical_kernel_map is a tool you can use to very easily kernelize 
+                any algorithm that operates on column vectors.  All you have to do is select a 
+                set of basis samples and then use the empirical_kernel_map to project all your 
+                data points into the part of kernel feature space spanned by those basis samples.
+                Then just run your normal algorithm on the output vectors and it will be effectively 
+                kernelized.  
+
+                Regarding methods to select a set of basis samples, if you are working with only a 
+                few thousand samples then you can just use all of them as basis samples.  
+                Alternatively, the linearly_independent_subset_finder often works well for 
+                selecting a basis set.  I also find that picking a random subset typically works 
+                well.
+
+
+                The empirical kernel map is something that has been around in the kernel methods
+                literature for a long time but is seemingly not well known.  Anyway, one of the
+                best books on the subject is the following:
+                    Learning with Kernels: Support Vector Machines, Regularization, Optimization, 
+                    and Beyond by Bernhard Schlkopf, Alexander J. Smola
+                The authors discuss the empirical kernel map as well as many other interesting 
+                topics.
+        !*/
+
+    public:
+
+        typedef kern_type kernel_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+        struct empirical_kernel_map_error : public error;
+        /*!
+            This is an exception class used to indicate a failure to create a 
+            kernel map from data given by the user.
+        !*/
+
+        empirical_kernel_map (
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        void clear (
+        );
+        /*!
+            ensures
+                - this object has its initial value
+        !*/
+
+        template <typename T>
+        void load(
+            const kernel_type& kernel,
+            const T& basis_samples
+        );
+        /*!
+            requires
+                - T must be a dlib::matrix type or something convertible to a matrix via mat()
+                  (e.g. a std::vector)
+                - is_vector(basis_samples) == true
+                - basis_samples.size() > 0
+                - kernel must be capable of operating on the elements of basis_samples.  That is,
+                  expressions such as kernel(basis_samples(0), basis_samples(0)) should make sense.
+            ensures
+                - 0 < #out_vector_size() <= basis_samples.size()
+                - #basis_size() == basis_samples.size()
+                - #get_kernel() == kernel
+                - This function constructs a map between normal sample_type objects and the 
+                  subspace of the kernel feature space defined by the given kernel and the
+                  given set of basis samples.  So after this function has been called you
+                  will be able to project sample_type objects into kernel feature space
+                  and obtain the resulting vector as a regular column matrix.
+                - The basis samples are loaded into this object in the order in which they
+                  are stored in basis_samples.  That is:
+                    - for all valid i: (*this)[i] == basis_samples(i)
+            throws
+                - empirical_kernel_map_error
+                    This exception is thrown if we are unable to create a kernel map.
+                    If this happens then this object will revert back to its initial value.
+        !*/
+
+        void load(
+            const linearly_independent_subset_finder<kernel_type>& lisf
+        );
+        /*!
+            ensures
+                - #out_vector_size() == lisf.dictionary_size() 
+                - #basis_size() == lisf.dictionary_size()
+                - #get_kernel() == lisf.get_kernel()
+                - Uses the dictionary vectors from lisf as a basis set.  Thus, this function 
+                  constructs a map between normal sample_type objects and the subspace of 
+                  the kernel feature space defined by the given kernel and the given set 
+                  of basis samples.  So after this function has been called you will be 
+                  able to project sample_type objects into kernel feature space and obtain 
+                  the resulting vector as a regular column matrix.
+                - The basis samples are loaded into this object in the order in which they
+                  are stored in lisf.  That is:
+                    - for all valid i: (*this)[i] == lisf[i]
+            throws
+                - empirical_kernel_map_error
+                    This exception is thrown if we are unable to create a kernel map.  
+                    E.g.  if the lisf.size() == 0.  
+                    If this happens then this object will revert back to its initial value.
+        !*/
+
+        const kernel_type get_kernel (
+        ) const;
+        /*!
+            requires
+                - out_vector_size() != 0
+            ensures
+                - returns a copy of the kernel used by this object
+        !*/
+
+        long out_vector_size (
+        ) const;
+        /*!
+            ensures
+                - if (this object has been loaded with basis samples) then
+                    - returns the dimensionality of the vectors output by the project() function.
+                - else
+                    - returns 0
+        !*/
+
+        unsigned long basis_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of basis vectors in projection_functions created
+                  by this object.  This is also equal to the number of basis vectors
+                  given to the load() function.
+        !*/
+
+        const sample_type& operator[] (
+            unsigned long idx
+        ) const;
+        /*!
+            requires
+                - idx < basis_size()
+            ensures
+                - returns a const reference to the idx'th basis vector contained inside 
+                  this object.
+        !*/
+
+        const matrix<scalar_type,0,1,mem_manager_type>& project (
+            const sample_type& sample 
+        ) const;
+        /*!
+            requires
+                - out_vector_size() != 0
+            ensures
+                - takes the given sample and projects it into the kernel feature space
+                  of out_vector_size() dimensions defined by this kernel map and 
+                  returns the resulting vector.
+                - in more precise terms, this function returns a vector such that:
+                    - The returned vector will contain out_vector_size() elements.
+                    - for any sample_type object S, the following equality is approximately true:
+                        - get_kernel()(sample,S) == dot(project(sample), project(S)).  
+                    - The approximation error in the above equality will be zero (within rounding error)
+                      if both sample_type objects involved are within the span of the set of basis 
+                      samples given to the load() function.  If they are not then there will be some 
+                      approximation error.  Note that all the basis samples are always within their
+                      own span.  So the equality is always exact for the samples given to the load() 
+                      function.
+        !*/
+
+        const matrix<scalar_type,0,1,mem_manager_type>& project (
+            const sample_type& samp,
+            scalar_type& projection_error
+        ) const;
+        /*!
+            requires
+                - out_vector_size() != 0
+            ensures
+                - This function returns project(samp)
+                  (i.e. it returns the same thing as the above project() function)
+                - #projection_error == the square of the distance between the point samp 
+                  gets projected onto and samp's true image in kernel feature space.  
+                  That is, this value is equal to: 
+                    pow(convert_to_distance_function(project(samp))(samp),2)
+        !*/
+
+        template <typename EXP>
+        const decision_function<kernel_type> convert_to_decision_function (
+            const matrix_exp<EXP>& vect
+        ) const;
+        /*!
+            requires
+                - is_vector(vect) == true
+                - vect.size() == out_vector_size()
+                - out_vector_size() != 0
+            ensures
+                - This function interprets the given vector as a point in the kernel feature space defined 
+                  by this empirical_kernel_map.  The return value of this function is a decision 
+                  function, DF, that represents the given vector in the following sense:
+                    - for all possible sample_type objects, S, it is the case that DF(S) == dot(project(S), vect)
+                      (i.e. the returned decision function computes dot products, in kernel feature space, 
+                      between vect and any argument you give it.  Note also that this equality is exact, even
+                      for sample_type objects not in the span of the basis samples.)
+                    - DF.kernel_function == get_kernel()
+                    - DF.b == 0
+                    - DF.basis_vectors == these will be the basis samples given to the previous call to load().  Note
+                      that it is possible for there to be fewer basis_vectors than basis samples given to load().  
+                    - DF.basis_vectors.size() == basis_size()
+        !*/
+
+        template <typename EXP>
+        const distance_function<kernel_type> convert_to_distance_function (
+            const matrix_exp<EXP>& vect
+        ) const
+        /*!
+            requires
+                - is_vector(vect) == true
+                - vect.size() == out_vector_size()
+                - out_vector_size() != 0
+            ensures
+                - This function interprets the given vector as a point in the kernel feature space defined 
+                  by this empirical_kernel_map.  The return value of this function is a distance 
+                  function, DF, that represents the given vector in the following sense:
+                    - for any sample_type object S, the following equality is approximately true: 
+                        - DF(S) == length(project(S) - vect)
+                          (i.e. the returned distance function computes distances, in kernel feature space, 
+                          between vect and any argument you give it. )
+                    - The approximation error in the above equality will be zero (within rounding error)
+                      if S is within the span of the set of basis samples given to the load() function.  
+                      If it is not then there will be some approximation error.  Note that all the basis 
+                      samples are always within their own span.  So the equality is always exact for the 
+                      samples given to the load() function.  Note further that the distance computed
+                      by DF(S) is always the correct distance in kernel feature space between vect and
+                      the true projection of S.  That is, the above equality is approximate only because 
+                      of potential error in the project() function, not in DF(S).
+                    - DF.kernel_function == get_kernel()
+                    - DF.b == dot(vect,vect) 
+                    - DF.basis_vectors == these will be the basis samples given to the previous call to load().  Note
+                      that it is possible for there to be fewer basis_vectors than basis samples given to load().  
+                    - DF.basis_vectors.size() == basis_size()
+        !*/
+
+        const projection_function<kernel_type> get_projection_function (
+        ) const;
+        /*!
+            requires
+                - out_vector_size() != 0
+            ensures
+                - returns a projection_function, PF, that computes the same projection as project().
+                  That is, calling PF() on any sample will produce the same output vector as calling
+                  this->project() on that sample.
+                - PF.basis_vectors.size() == basis_size()
+        !*/
+
+        const matrix<scalar_type,0,0,mem_manager_type> get_transformation_to (
+            const empirical_kernel_map& target
+        ) const;
+        /*!
+            requires
+                - get_kernel() == target.get_kernel()
+                - out_vector_size() != 0
+                - target.out_vector_size() != 0
+            ensures
+                - A point in the kernel feature space defined by the kernel get_kernel() typically
+                  has different representations with respect to different empirical_kernel_maps.
+                  This function lets you obtain a transformation matrix that will allow you
+                  to project between these different representations. That is, this function returns 
+                  a matrix M with the following properties:    
+                    - M maps vectors represented according to *this into the representation used by target. 
+                    - M.nr() == target.out_vector_size()
+                    - M.nc() == this->out_vector_size()
+                    - Let V be a vector of this->out_vector_size() length.  Then define two distance_functions
+                      DF1 = this->convert_to_distance_function(V)
+                      DF2 = target.convert_to_distance_function(M*V)
+
+                      Then DF1(DF2) == 0 // i.e. the distance between these two points should be 0
+
+                      That is, DF1 and DF2 both represent the same point in kernel feature space.  Note
+                      that the above equality is only approximate.  If the vector V represents a point in
+                      kernel space that isn't in the span of the basis samples used by target then the 
+                      equality is approximate.  However, if it is in their span then the equality will
+                      be exact.  For example, if target's basis samples are a superset of the basis samples
+                      used by *this then the equality will always be exact (within rounding error).
+        !*/
+
+        void get_transformation_to (
+            const empirical_kernel_map& target,
+            matrix<scalar_type, 0, 0, mem_manager_type>& tmat,
+            projection_function<kernel_type>& partial_projection
+        ) const;
+        /*!
+            requires
+                - get_kernel() == target.get_kernel()
+                - out_vector_size() != 0
+                - target.out_vector_size() != 0
+                - basis_size() < target.basis_size()
+                - for all i < basis_size(): (*this)[i] == target[i]
+                  i.e. target must contain a superset of the basis vectors contained in *this.  Moreover,
+                  it must contain them in the same order.
+            ensures
+                - The single argument version of get_transformation_to() allows you to project 
+                  vectors from one empirical_kernel_map representation to another.  This version
+                  provides a somewhat different capability.  Assuming target's basis vectors form a
+                  superset of *this's basis vectors then this form of get_transformation_to() allows
+                  you to reuse a vector from *this ekm to speed up the projection performed by target.
+                  The defining relation is given below.
+                - for any sample S: 
+                    - target.project(S) == #tmat * this->project(S) + #partial_projection(S)
+                      (this is always true to within rounding error for any S)
+                - #partial_projection.basis_vectors.size() == target.basis_vectors.size() - this->basis_vectors.size()
+                - #tmat.nr() == target.out_vector_size()
+                - #tmat.nc() == this->out_vector_size()
+        !*/
+
+        void swap (
+            empirical_kernel_map& item
+        );
+        /*!
+            ensures
+                - swaps the state of *this and item
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type
+        >
+    void swap (
+        empirical_kernel_map<kernel_type>& a,
+        empirical_kernel_map<kernel_type>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap function
+    !*/
+
+    template <
+        typename kernel_type
+        >
+    void serialize (
+        const empirical_kernel_map<kernel_type>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for empirical_kernel_map objects
+    !*/
+
+    template <
+        typename kernel_type
+        >
+    void deserialize (
+        empirical_kernel_map<kernel_type>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for empirical_kernel_map objects
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_EMPIRICAL_KERNEl_MAP_ABSTRACT_H_
+
diff --git a/ml/dlib/dlib/svm/feature_ranking.h b/ml/dlib/dlib/svm/feature_ranking.h
new file mode 100644
index 000000000..f6324fe3d
--- /dev/null
+++ b/ml/dlib/dlib/svm/feature_ranking.h
@@ -0,0 +1,477 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_KERNEL_FEATURE_RANKINg_H_
+#define DLIB_KERNEL_FEATURE_RANKINg_H_
+
+#include <vector>
+#include <limits>
+
+#include "feature_ranking_abstract.h"
+#include "kcentroid.h"
+#include "../optimization.h"
+#include "../statistics.h"
+#include <iostream>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename sample_matrix_type,
+        typename label_matrix_type
+        >
+    matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features_impl (
+        const kcentroid<kernel_type>& kc,
+        const sample_matrix_type& samples,
+        const label_matrix_type& labels
+    )
+    {
+        /*
+            This function ranks features by doing recursive feature elimination
+
+        */
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::mem_manager_type mm;
+
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_binary_classification_problem(samples, labels) == true,
+            "\tmatrix rank_features()"
+            << "\n\t you have given invalid arguments to this function"
+            );
+
+        matrix<scalar_type,0,2,mm> results(samples(0).nr(), 2);
+        matrix<scalar_type,sample_matrix_type::type::NR,1,mm> mask(samples(0).nr());
+        set_all_elements(mask,1);
+
+        // figure out what the separation is between the two centroids when all the features are 
+        // present.
+        scalar_type first_separation;
+        {
+            kcentroid<kernel_type> c1(kc);
+            kcentroid<kernel_type> c2(kc);
+            // find the centers of each class
+            for (long s = 0; s < samples.size(); ++s)
+            {
+                if (labels(s) < 0)
+                {
+                    c1.train(samples(s));
+                }
+                else
+                {
+                    c2.train(samples(s));
+                }
+
+            }
+            first_separation = c1(c2);
+        }
+
+
+        using namespace std;
+
+        for (long i = results.nr()-1; i >= 0; --i)
+        {
+            long worst_feature_idx = 0;
+            scalar_type worst_feature_score = -std::numeric_limits<scalar_type>::infinity();
+
+            // figure out which feature to remove next
+            for (long j = 0; j < mask.size(); ++j)
+            {
+                // skip features we have already removed
+                if (mask(j) == 0)
+                    continue;
+
+                kcentroid<kernel_type> c1(kc);
+                kcentroid<kernel_type> c2(kc);
+
+                // temporarily remove this feature from the working set of features
+                mask(j) = 0;
+
+                // find the centers of each class
+                for (long s = 0; s < samples.size(); ++s)
+                {
+                    if (labels(s) < 0)
+                    {
+                        c1.train(pointwise_multiply(samples(s),mask));
+                    }
+                    else
+                    {
+                        c2.train(pointwise_multiply(samples(s),mask));
+                    }
+
+                }
+
+                // find the distance between the two centroids and use that
+                // as the score
+                const double score = c1(c2);
+
+                if (score > worst_feature_score)
+                {
+                    worst_feature_score = score;
+                    worst_feature_idx = j;
+                }
+
+                // add this feature back to the working set of features
+                mask(j) = 1;
+
+            }
+
+            // now that we know what the next worst feature is record it 
+            mask(worst_feature_idx) = 0;
+            results(i,0) = worst_feature_idx;
+            results(i,1) = worst_feature_score; 
+        }
+
+        // now normalize the results 
+        const scalar_type max_separation = std::max(max(colm(results,1)), first_separation);
+        set_colm(results,1) = colm(results,1)/max_separation;
+        for (long r = 0; r < results.nr()-1; ++r)
+        {
+            results(r,1) = results(r+1,1);
+        }
+        results(results.nr()-1,1) = first_separation/max_separation;
+
+        return results;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename sample_matrix_type,
+        typename label_matrix_type
+        >
+    matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features (
+        const kcentroid<kernel_type>& kc,
+        const sample_matrix_type& samples,
+        const label_matrix_type& labels
+    )
+    {
+        return rank_features_impl(kc, mat(samples), mat(labels));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename sample_matrix_type,
+        typename label_matrix_type
+        >
+    matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features_impl (
+        const kcentroid<kernel_type>& kc,
+        const sample_matrix_type& samples,
+        const label_matrix_type& labels,
+        const long num_features
+    )
+    {
+        /*
+            This function ranks features by doing recursive feature addition 
+
+        */
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::mem_manager_type mm;
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_binary_classification_problem(samples, labels) == true,
+            "\tmatrix rank_features()"
+            << "\n\t you have given invalid arguments to this function"
+            );
+        DLIB_ASSERT(0 < num_features && num_features <= samples(0).nr(),
+            "\tmatrix rank_features()"
+            << "\n\t you have given invalid arguments to this function"
+            << "\n\t num_features:    " << num_features
+            << "\n\t samples(0).nr(): " << samples(0).nr() 
+            );
+
+        matrix<scalar_type,0,2,mm> results(num_features, 2);
+        matrix<scalar_type,sample_matrix_type::type::NR,1,mm> mask(samples(0).nr());
+        set_all_elements(mask,0);
+
+        using namespace std;
+
+        for (long i = 0; i < results.nr(); ++i)
+        {
+            long best_feature_idx = 0;
+            scalar_type best_feature_score = -std::numeric_limits<scalar_type>::infinity();
+
+            // figure out which feature to add next
+            for (long j = 0; j < mask.size(); ++j)
+            {
+                // skip features we have already added 
+                if (mask(j) == 1)
+                    continue;
+
+                kcentroid<kernel_type> c1(kc);
+                kcentroid<kernel_type> c2(kc);
+
+                // temporarily add this feature to the working set of features
+                mask(j) = 1;
+
+                // find the centers of each class
+                for (long s = 0; s < samples.size(); ++s)
+                {
+                    if (labels(s) < 0)
+                    {
+                        c1.train(pointwise_multiply(samples(s),mask));
+                    }
+                    else
+                    {
+                        c2.train(pointwise_multiply(samples(s),mask));
+                    }
+
+                }
+
+                // find the distance between the two centroids and use that
+                // as the score
+                const double score = c1(c2);
+
+                if (score > best_feature_score)
+                {
+                    best_feature_score = score;
+                    best_feature_idx = j;
+                }
+
+                // take this feature back out of the working set of features
+                mask(j) = 0;
+
+            }
+
+            // now that we know what the next best feature is record it 
+            mask(best_feature_idx) = 1;
+            results(i,0) = best_feature_idx;
+            results(i,1) = best_feature_score; 
+        }
+
+        // now normalize the results 
+        set_colm(results,1) = colm(results,1)/max(colm(results,1));
+
+        return results;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename sample_matrix_type,
+        typename label_matrix_type
+        >
+    matrix<typename kernel_type::scalar_type,0,2,typename kernel_type::mem_manager_type> rank_features (
+        const kcentroid<kernel_type>& kc,
+        const sample_matrix_type& samples,
+        const label_matrix_type& labels,
+        const long num_features
+    )
+    {
+        if (mat(samples).nr() > 0 && num_features == mat(samples)(0).nr())
+        {
+            // if we are going to rank them all then might as well do the recursive feature elimination version
+            return rank_features_impl(kc, mat(samples), mat(labels));
+        }
+        else
+        {
+            return rank_features_impl(kc, mat(samples), mat(labels), num_features);
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    namespace rank_features_helpers
+    {
+        template <
+            typename K,
+            typename sample_matrix_type,
+            typename label_matrix_type
+            >
+        typename K::scalar_type centroid_gap (
+            const kcentroid<K>& kc,
+            const sample_matrix_type& samples,
+            const label_matrix_type& labels
+        )
+        {
+            kcentroid<K> kc1(kc);
+            kcentroid<K> kc2(kc);
+
+            // toss all the samples into our kcentroids
+            for (long i = 0; i < samples.size(); ++i)
+            {
+                if (labels(i) > 0)
+                    kc1.train(samples(i));
+                else
+                    kc2.train(samples(i));
+            }
+
+            // now return the separation between the mean of these two centroids
+            return kc1(kc2);
+        }
+
+        template <
+            typename sample_matrix_type,
+            typename label_matrix_type
+            >
+        class test
+        {
+            typedef typename sample_matrix_type::type sample_type;
+            typedef typename sample_type::type scalar_type;
+            typedef typename sample_type::mem_manager_type mem_manager_type;
+
+        public:
+            test (
+                const sample_matrix_type& samples_,
+                const label_matrix_type& labels_,
+                unsigned long num_sv_,
+                bool verbose_
+            ) : samples(samples_), labels(labels_), num_sv(num_sv_), verbose(verbose_)
+            {
+            }
+
+            double operator() (
+                double gamma
+            ) const
+            {
+                using namespace std;
+
+                // we are doing the optimization in log space so don't forget to convert back to normal space
+                gamma = std::exp(gamma);
+
+                typedef radial_basis_kernel<sample_type> kernel_type;
+                // Make a kcentroid and find out what the gap is at the current gamma.  Try to pick a reasonable
+                // tolerance.
+                const double tolerance = std::min(gamma*0.01, 0.01);
+                const kernel_type kern(gamma);
+                kcentroid<kernel_type> kc(kern, tolerance, num_sv);
+                scalar_type temp = centroid_gap(kc, samples, labels);
+
+                if (verbose)
+                {
+                    cout << "\rChecking goodness of gamma = " << gamma << ".  Goodness = " 
+                         << temp << "                    " << flush;
+                }
+                return temp;
+            }
+
+            const sample_matrix_type& samples;
+            const label_matrix_type& labels;
+            unsigned long num_sv;
+            bool verbose;
+
+        };
+
+        template <
+            typename sample_matrix_type,
+            typename label_matrix_type
+            >
+        double find_gamma_with_big_centroid_gap_impl (
+            const sample_matrix_type& samples,
+            const label_matrix_type& labels,
+            double initial_gamma,
+            unsigned long num_sv,
+            bool verbose
+        )
+        {
+            using namespace std;
+
+            if (verbose)
+            {
+                cout << endl;
+            }
+
+            test<sample_matrix_type, label_matrix_type> funct(samples, labels, num_sv, verbose);
+            double best_gamma = std::log(initial_gamma);
+            double goodness = find_max_single_variable(funct, best_gamma, -15, 15, 1e-3, 100);
+            
+            if (verbose)
+            {
+                cout << "\rBest gamma = " << std::exp(best_gamma) << ".  Goodness = " 
+                    << goodness << "                    " << endl;
+            }
+
+            return std::exp(best_gamma);
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sample_matrix_type,
+        typename label_matrix_type
+        >
+    double find_gamma_with_big_centroid_gap (
+        const sample_matrix_type& samples,
+        const label_matrix_type& labels,
+        double initial_gamma = 0.1,
+        unsigned long num_sv = 40
+    )
+    {
+        DLIB_ASSERT(initial_gamma > 0 && num_sv > 0 && is_binary_classification_problem(samples, labels),
+            "\t double find_gamma_with_big_centroid_gap()"
+            << "\n\t initial_gamma: " << initial_gamma
+            << "\n\t num_sv:        " << num_sv 
+            << "\n\t is_binary_classification_problem(): " << is_binary_classification_problem(samples, labels) 
+            );
+
+        return rank_features_helpers::find_gamma_with_big_centroid_gap_impl(mat(samples), 
+                                                             mat(labels),
+                                                             initial_gamma,
+                                                             num_sv,
+                                                             false);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sample_matrix_type,
+        typename label_matrix_type
+        >
+    double verbose_find_gamma_with_big_centroid_gap (
+        const sample_matrix_type& samples,
+        const label_matrix_type& labels,
+        double initial_gamma = 0.1,
+        unsigned long num_sv = 40
+    )
+    {
+        DLIB_ASSERT(initial_gamma > 0 && num_sv > 0 && is_binary_classification_problem(samples, labels),
+            "\t double verbose_find_gamma_with_big_centroid_gap()"
+            << "\n\t initial_gamma: " << initial_gamma
+            << "\n\t num_sv:        " << num_sv 
+            << "\n\t is_binary_classification_problem(): " << is_binary_classification_problem(samples, labels) 
+            );
+
+        return rank_features_helpers::find_gamma_with_big_centroid_gap_impl(mat(samples), 
+                                                             mat(labels),
+                                                             initial_gamma,
+                                                             num_sv,
+                                                             true);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type
+        >
+    double compute_mean_squared_distance (
+        const vector_type& samples
+    )
+    {
+        running_stats<double> rs;
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            for (unsigned long j = i+1; j < samples.size(); ++j)
+            {
+                rs.add(length_squared(samples[i] - samples[j]));
+            }
+        }
+
+        return rs.mean();
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_KERNEL_FEATURE_RANKINg_H_
+
+
diff --git a/ml/dlib/dlib/svm/feature_ranking_abstract.h b/ml/dlib/dlib/svm/feature_ranking_abstract.h
new file mode 100644
index 000000000..5a6fd3bb9
--- /dev/null
+++ b/ml/dlib/dlib/svm/feature_ranking_abstract.h
@@ -0,0 +1,136 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_KERNEL_FEATURE_RANKINg_ABSTRACT_H_
+#ifdef DLIB_KERNEL_FEATURE_RANKINg_ABSTRACT_H_
+
+#include <vector>
+#include <limits>
+
+#include "svm_abstract.h"
+#include "kcentroid_abstract.h"
+#include "../is_kind.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename sample_matrix_type,
+        typename label_matrix_type
+        >
+    matrix<typename kernel_type::scalar_type> rank_features (
+        const kcentroid<kernel_type>& kc,
+        const sample_matrix_type& samples,
+        const label_matrix_type& labels,
+        const long num_features = samples(0).nr()
+    );
+    /*!
+        requires
+            - sample_matrix_type == a matrix or something convertible to a matrix via mat()
+            - label_matrix_type  == a matrix or something convertible to a matrix via mat()
+            - is_binary_classification_problem(samples, labels) == true
+            - kc.train(samples(0)) must be a valid expression.  This means that
+              kc must use a kernel type that is capable of operating on the
+              contents of the samples matrix
+            - 0 < num_features <= samples(0).nr()
+        ensures
+            - Let Class1 denote the centroid of all the samples with labels that are < 0
+            - Let Class2 denote the centroid of all the samples with labels that are > 0
+            - finds a ranking of the features where the best features come first.  This 
+              function does this by computing the distance between the centroid of the Class1 
+              samples and the Class2 samples in kernel defined feature space.
+              Good features are then ones that result in the biggest separation between
+              the two centroids of Class1 and Class2.
+            - Uses the kc object to compute the centroids of the two classes
+            - returns a ranking matrix R where:
+                - R.nr() == num_features
+                - r.nc() == 2
+                - R(i,0) == the index of the ith best feature according to our ranking.
+                  (e.g. samples(n)(R(0,0)) is the best feature from sample(n) and
+                   samples(n)(R(1,0)) is the second best, samples(n)(R(2,0)) the
+                   third best and so on)
+                - R(i,1) == a number that indicates how much separation exists between 
+                  the two centroids when features 0 through i are used.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sample_matrix_type,
+        typename label_matrix_type
+        >
+    double find_gamma_with_big_centroid_gap (
+        const sample_matrix_type& samples,
+        const label_matrix_type& labels,
+        double initial_gamma = 0.1,
+        unsigned long num_sv = 40
+    );
+    /*!
+        requires
+            - initial_gamma > 0
+            - num_sv > 0
+            - is_binary_classification_problem(samples, labels) == true
+        ensures
+            - This is a function that tries to pick a reasonable default value for the gamma 
+              parameter of the radial_basis_kernel.  It picks the parameter that gives the 
+              largest separation between the centroids, in kernel feature space, of two classes 
+              of data. It does this using the kcentroid object and it sets the kcentroid up 
+              to use num_sv dictionary vectors.
+            - This function does a search for the best gamma and the search starts with
+              the value given by initial_gamma.  Better initial guesses will give 
+              better results since the routine may get stuck in a local minima.
+            - returns the value of gamma that results in the largest separation.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sample_matrix_type,
+        typename label_matrix_type
+        >
+    double verbose_find_gamma_with_big_centroid_gap (
+        const sample_matrix_type& samples,
+        const label_matrix_type& labels,
+        double initial_gamma = 0.1,
+        unsigned long num_sv = 40
+    );
+    /*!
+        requires
+            - initial_gamma > 0
+            - num_sv > 0
+            - is_binary_classification_problem(samples, labels) == true
+        ensures
+            - This function does the same exact thing as the above find_gamma_with_big_centroid_gap()
+              except that it is also verbose in the sense that it will print status messages to
+              standard out during its processing.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type
+        >
+    double compute_mean_squared_distance (
+        const vector_type& samples
+    );
+    /*!
+        requires
+            - vector_type is something with an interface compatible with std::vector.  
+              Additionally, it must in turn contain dlib::matrix types which contain 
+              scalars such as float or double values.
+            - for all valid i: is_vector(samples[i]) == true
+        ensures
+            - computes the average value of the squares of all the pairwise 
+              distances between every element of samples.  
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_KERNEL_FEATURE_RANKINg_ABSTRACT_H_
+
+
+
diff --git a/ml/dlib/dlib/svm/function.h b/ml/dlib/dlib/svm/function.h
new file mode 100644
index 000000000..f5a62a9f7
--- /dev/null
+++ b/ml/dlib/dlib/svm/function.h
@@ -0,0 +1,882 @@
+// Copyright (C) 2007  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_FUNCTION
+#define DLIB_SVm_FUNCTION
+
+#include "function_abstract.h"
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include "../rand.h"
+#include "../statistics.h"
+#include "kernel_matrix.h"
+#include "kernel.h"
+#include "sparse_kernel.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K
+        >
+    struct decision_function
+    {
+        typedef K kernel_type;
+        typedef typename K::scalar_type scalar_type;
+        typedef typename K::scalar_type result_type;
+        typedef typename K::sample_type sample_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+        typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type;
+
+        scalar_vector_type alpha;
+        scalar_type b;
+        K kernel_function;
+        sample_vector_type basis_vectors;
+
+        decision_function (
+        ) : b(0), kernel_function(K()) {}
+
+        decision_function (
+            const decision_function& d
+        ) : 
+            alpha(d.alpha), 
+            b(d.b),
+            kernel_function(d.kernel_function),
+            basis_vectors(d.basis_vectors) 
+        {}
+
+        decision_function (
+            const scalar_vector_type& alpha_,
+            const scalar_type& b_,
+            const K& kernel_function_,
+            const sample_vector_type& basis_vectors_
+        ) :
+            alpha(alpha_),
+            b(b_),
+            kernel_function(kernel_function_),
+            basis_vectors(basis_vectors_)
+        {}
+
+        result_type operator() (
+            const sample_type& x
+        ) const
+        {
+            result_type temp = 0;
+            for (long i = 0; i < alpha.nr(); ++i)
+                temp += alpha(i) * kernel_function(x,basis_vectors(i));
+
+            return temp - b;
+        }
+    };
+
+    template <
+        typename K
+        >
+    void serialize (
+        const decision_function<K>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.alpha, out);
+            serialize(item.b,     out);
+            serialize(item.kernel_function, out);
+            serialize(item.basis_vectors, out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type decision_function"); 
+        }
+    }
+
+    template <
+        typename K
+        >
+    void deserialize (
+        decision_function<K>& item,
+        std::istream& in 
+    )
+    {
+        try
+        {
+            deserialize(item.alpha, in);
+            deserialize(item.b, in);
+            deserialize(item.kernel_function, in);
+            deserialize(item.basis_vectors, in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type decision_function"); 
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename function_type
+        >
+    struct probabilistic_function
+    {
+        typedef typename function_type::scalar_type scalar_type;
+        typedef typename function_type::result_type result_type;
+        typedef typename function_type::sample_type sample_type;
+        typedef typename function_type::mem_manager_type mem_manager_type;
+
+        scalar_type alpha;
+        scalar_type beta;
+        function_type decision_funct;
+
+        probabilistic_function (
+        ) : alpha(0), beta(0), decision_funct(function_type()) {}
+
+        probabilistic_function (
+            const probabilistic_function& d
+        ) : 
+            alpha(d.alpha),
+            beta(d.beta),
+            decision_funct(d.decision_funct)
+        {}
+
+        probabilistic_function (
+            const scalar_type a_,
+            const scalar_type b_,
+            const function_type& decision_funct_ 
+        ) :
+            alpha(a_),
+            beta(b_),
+            decision_funct(decision_funct_)
+        {}
+
+        result_type operator() (
+            const sample_type& x
+        ) const
+        {
+            result_type f = decision_funct(x);
+            return 1/(1 + std::exp(alpha*f + beta));
+        }
+    };
+
+    template <
+        typename function_type 
+        >
+    void serialize (
+        const probabilistic_function<function_type>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.alpha, out);
+            serialize(item.beta, out);
+            serialize(item.decision_funct, out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type probabilistic_function"); 
+        }
+    }
+
+    template <
+        typename function_type
+        >
+    void deserialize (
+        probabilistic_function<function_type>& item,
+        std::istream& in 
+    )
+    {
+        try
+        {
+            deserialize(item.alpha, in);
+            deserialize(item.beta, in);
+            deserialize(item.decision_funct, in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type probabilistic_function"); 
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    struct probabilistic_decision_function
+    {
+        typedef K kernel_type;
+        typedef typename K::scalar_type scalar_type;
+        typedef typename K::scalar_type result_type;
+        typedef typename K::sample_type sample_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+
+        scalar_type alpha;
+        scalar_type beta;
+        decision_function<K> decision_funct;
+
+        probabilistic_decision_function (
+        ) : alpha(0), beta(0), decision_funct(decision_function<K>()) {}
+
+        probabilistic_decision_function (
+            const probabilistic_function<decision_function<K> >& d
+        ) : 
+            alpha(d.alpha),
+            beta(d.beta),
+            decision_funct(d.decision_funct)
+        {}
+
+        probabilistic_decision_function (
+            const probabilistic_decision_function& d
+        ) : 
+            alpha(d.alpha),
+            beta(d.beta),
+            decision_funct(d.decision_funct)
+        {}
+
+        probabilistic_decision_function (
+            const scalar_type a_,
+            const scalar_type b_,
+            const decision_function<K>& decision_funct_ 
+        ) :
+            alpha(a_),
+            beta(b_),
+            decision_funct(decision_funct_)
+        {}
+
+        result_type operator() (
+            const sample_type& x
+        ) const
+        {
+            result_type f = decision_funct(x);
+            return 1/(1 + std::exp(alpha*f + beta));
+        }
+    };
+
+    template <
+        typename K 
+        >
+    void serialize (
+        const probabilistic_decision_function<K>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.alpha, out);
+            serialize(item.beta, out);
+            serialize(item.decision_funct, out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type probabilistic_decision_function"); 
+        }
+    }
+
+    template <
+        typename K 
+        >
+    void deserialize (
+        probabilistic_decision_function<K>& item,
+        std::istream& in 
+    )
+    {
+        try
+        {
+            deserialize(item.alpha, in);
+            deserialize(item.beta, in);
+            deserialize(item.decision_funct, in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type probabilistic_decision_function"); 
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K
+        >
+    class distance_function
+    {
+    public:
+        typedef K kernel_type;
+        typedef typename K::scalar_type scalar_type;
+        typedef typename K::scalar_type result_type;
+        typedef typename K::sample_type sample_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+        typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type;
+
+
+        distance_function (
+        ) : b(0), kernel_function(K()) {}
+
+        explicit distance_function (
+            const kernel_type& kern
+        ) : b(0), kernel_function(kern) {}
+
+        distance_function (
+            const kernel_type& kern,
+            const sample_type& samp
+        ) :
+            alpha(ones_matrix<scalar_type>(1,1)),
+            b(kern(samp,samp)),
+            kernel_function(kern)
+        {
+            basis_vectors.set_size(1,1);
+            basis_vectors(0) = samp;
+        }
+
+        distance_function (
+            const decision_function<K>& f
+        ) :
+            alpha(f.alpha),
+            b(trans(f.alpha)*kernel_matrix(f.kernel_function,f.basis_vectors)*f.alpha),
+            kernel_function(f.kernel_function),
+            basis_vectors(f.basis_vectors)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(f.alpha.size() == f.basis_vectors.size(),
+                "\t distance_function(f)"
+                << "\n\t The supplied decision_function is invalid."
+                << "\n\t f.alpha.size(): " << f.alpha.size()
+                << "\n\t f.basis_vectors.size(): " << f.basis_vectors.size()
+                );
+        }
+
+        distance_function (
+            const distance_function& d
+        ) : 
+            alpha(d.alpha), 
+            b(d.b),
+            kernel_function(d.kernel_function),
+            basis_vectors(d.basis_vectors) 
+        {
+        }
+
+        distance_function (
+            const scalar_vector_type& alpha_,
+            const scalar_type& b_,
+            const K& kernel_function_,
+            const sample_vector_type& basis_vectors_
+        ) :
+            alpha(alpha_),
+            b(b_),
+            kernel_function(kernel_function_),
+            basis_vectors(basis_vectors_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(alpha_.size() == basis_vectors_.size(),
+                "\t distance_function()"
+                << "\n\t The supplied arguments are invalid."
+                << "\n\t alpha_.size(): " << alpha_.size()
+                << "\n\t basis_vectors_.size(): " << basis_vectors_.size()
+                );
+        }
+
+        distance_function (
+            const scalar_vector_type& alpha_,
+            const K& kernel_function_,
+            const sample_vector_type& basis_vectors_
+        ) :
+            alpha(alpha_),
+            b(trans(alpha)*kernel_matrix(kernel_function_,basis_vectors_)*alpha),
+            kernel_function(kernel_function_),
+            basis_vectors(basis_vectors_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(alpha_.size() == basis_vectors_.size(),
+                "\t distance_function()"
+                << "\n\t The supplied arguments are invalid."
+                << "\n\t alpha_.size(): " << alpha_.size()
+                << "\n\t basis_vectors_.size(): " << basis_vectors_.size()
+                );
+        }
+
+        const scalar_vector_type& get_alpha (
+        ) const { return alpha; }
+
+        const scalar_type& get_squared_norm (
+        ) const { return b; }
+
+        const K& get_kernel(
+        ) const { return kernel_function; }
+
+        const sample_vector_type& get_basis_vectors (
+        ) const { return basis_vectors; }
+
+        result_type operator() (
+            const sample_type& x
+        ) const
+        {
+            result_type temp = 0;
+            for (long i = 0; i < alpha.nr(); ++i)
+                temp += alpha(i) * kernel_function(x,basis_vectors(i));
+
+            temp = b + kernel_function(x,x) - 2*temp; 
+            if (temp > 0)
+                return std::sqrt(temp);
+            else
+                return 0;
+        }
+
+        result_type operator() (
+            const distance_function& x
+        ) const
+        {
+            result_type temp = 0;
+            for (long i = 0; i < alpha.nr(); ++i)
+                for (long j = 0; j < x.alpha.nr(); ++j)
+                    temp += alpha(i)*x.alpha(j) * kernel_function(basis_vectors(i), x.basis_vectors(j));
+
+            temp = b + x.b - 2*temp;
+            if (temp > 0)
+                return std::sqrt(temp);
+            else
+                return 0;
+        }
+
+        distance_function operator* (
+            const scalar_type& val
+        ) const
+        {
+            return distance_function(val*alpha,
+                                     val*val*b,
+                                     kernel_function,
+                                     basis_vectors);
+        }
+
+        distance_function operator/ (
+            const scalar_type& val
+        ) const
+        {
+            return distance_function(alpha/val,
+                                     b/val/val,
+                                     kernel_function,
+                                     basis_vectors);
+        }
+
+        distance_function operator+ (
+            const distance_function& rhs
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(get_kernel() == rhs.get_kernel(),
+                "\t distance_function distance_function::operator+()"
+                << "\n\t You can only add two distance_functions together if they use the same kernel."
+                );
+
+            if (alpha.size() == 0)
+                return rhs;
+            else if (rhs.alpha.size() == 0)
+                return *this;
+            else
+                return distance_function(join_cols(alpha, rhs.alpha),
+                                        b + rhs.b + 2*trans(alpha)*kernel_matrix(kernel_function,basis_vectors,rhs.basis_vectors)*rhs.alpha,
+                                        kernel_function,
+                                        join_cols(basis_vectors, rhs.basis_vectors));
+        }
+
+        distance_function operator- (
+            const distance_function& rhs
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(get_kernel() == rhs.get_kernel(),
+                "\t distance_function distance_function::operator-()"
+                << "\n\t You can only subtract two distance_functions if they use the same kernel."
+                );
+
+            if (alpha.size() == 0 && rhs.alpha.size() == 0)
+                return distance_function(kernel_function);
+            else if (alpha.size() != 0 && rhs.alpha.size() == 0)
+                return *this;
+            else if (alpha.size() == 0 && rhs.alpha.size() != 0)
+                return -1*rhs;
+            else
+                return distance_function(join_cols(alpha, -rhs.alpha),
+                                        b + rhs.b - 2*trans(alpha)*kernel_matrix(kernel_function,basis_vectors,rhs.basis_vectors)*rhs.alpha,
+                                        kernel_function,
+                                        join_cols(basis_vectors, rhs.basis_vectors));
+        }
+
+    private:
+
+        scalar_vector_type alpha;
+        scalar_type b;
+        K kernel_function;
+        sample_vector_type basis_vectors;
+
+    };
+
+    template <
+        typename K
+        >
+    distance_function<K> operator* (
+        const typename K::scalar_type& val,
+        const distance_function<K>& df
+    ) { return df*val; }
+
+    template <
+        typename K
+        >
+    void serialize (
+        const distance_function<K>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.alpha, out);
+            serialize(item.b,     out);
+            serialize(item.kernel_function, out);
+            serialize(item.basis_vectors, out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type distance_function"); 
+        }
+    }
+
+    template <
+        typename K
+        >
+    void deserialize (
+        distance_function<K>& item,
+        std::istream& in 
+    )
+    {
+        try
+        {
+            deserialize(item.alpha, in);
+            deserialize(item.b, in);
+            deserialize(item.kernel_function, in);
+            deserialize(item.basis_vectors, in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type distance_function"); 
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename function_type,
+        typename normalizer_type = vector_normalizer<typename function_type::sample_type>
+        >
+    struct normalized_function 
+    {
+        typedef typename function_type::result_type result_type;
+        typedef typename function_type::sample_type sample_type;
+        typedef typename function_type::mem_manager_type mem_manager_type;
+
+        normalizer_type normalizer;
+        function_type function;
+
+        normalized_function (
+        ){}
+
+        normalized_function (
+            const normalized_function& f
+        ) :
+            normalizer(f.normalizer),
+            function(f.function)
+        {}
+
+        const std::vector<result_type> get_labels(
+        ) const { return function.get_labels(); }
+
+        unsigned long number_of_classes (
+        ) const { return function.number_of_classes(); }
+
+        normalized_function (
+            const vector_normalizer<sample_type>& normalizer_,
+            const function_type& funct 
+        ) : normalizer(normalizer_), function(funct) {}
+
+        result_type operator() (
+            const sample_type& x
+        ) const { return function(normalizer(x)); }
+    };
+
+    template <
+        typename function_type,
+        typename normalizer_type 
+        >
+    void serialize (
+        const normalized_function<function_type,normalizer_type>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.normalizer, out);
+            serialize(item.function,     out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type normalized_function"); 
+        }
+    }
+
+    template <
+        typename function_type,
+        typename normalizer_type 
+        >
+    void deserialize (
+        normalized_function<function_type,normalizer_type>& item,
+        std::istream& in 
+    )
+    {
+        try
+        {
+            deserialize(item.normalizer, in);
+            deserialize(item.function, in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type normalized_function"); 
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K
+        >
+    struct projection_function 
+    {
+        typedef K kernel_type;
+        typedef typename K::scalar_type scalar_type;
+        typedef typename K::sample_type sample_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type;
+        typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type;
+        typedef scalar_vector_type result_type;
+
+        scalar_matrix_type weights;
+        K                  kernel_function;
+        sample_vector_type basis_vectors;
+
+        projection_function (
+        ) {}
+
+        projection_function (
+            const projection_function& f
+        ) : weights(f.weights), kernel_function(f.kernel_function), basis_vectors(f.basis_vectors) {}
+
+        projection_function (
+            const scalar_matrix_type& weights_,
+            const K& kernel_function_,
+            const sample_vector_type& basis_vectors_
+        ) : weights(weights_), kernel_function(kernel_function_), basis_vectors(basis_vectors_) {}
+
+        long out_vector_size (
+        ) const { return weights.nr(); }
+
+        const result_type& operator() (
+            const sample_type& x
+        ) const
+        {
+            // Run the x sample through all the basis functions we have and then
+            // multiply it by the weights matrix and return the result.  Note that
+            // the temp vectors are here to avoid reallocating their memory every
+            // time this function is called.
+            temp1 = kernel_matrix(kernel_function, basis_vectors, x);
+            temp2 = weights*temp1;
+            return temp2;
+        }
+
+    private:
+        mutable result_type temp1, temp2;
+    };
+
+    template <
+        typename K
+        >
+    void serialize (
+        const projection_function<K>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.weights, out);
+            serialize(item.kernel_function,     out);
+            serialize(item.basis_vectors,     out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type projection_function"); 
+        }
+    }
+
+    template <
+        typename K
+        >
+    void deserialize (
+        projection_function<K>& item,
+        std::istream& in 
+    )
+    {
+        try
+        {
+            deserialize(item.weights, in);
+            deserialize(item.kernel_function,     in);
+            deserialize(item.basis_vectors,     in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type projection_function"); 
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K,
+        typename result_type_ = typename K::scalar_type 
+        >
+    struct multiclass_linear_decision_function
+    {
+        typedef result_type_ result_type;
+
+        typedef K kernel_type;
+        typedef typename K::scalar_type scalar_type;
+        typedef typename K::sample_type sample_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type;
+
+        // You are getting a compiler error on this line because you supplied a non-linear kernel
+        // to the multiclass_linear_decision_function object.  You have to use one of the linear 
+        // kernels with this object.
+        COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value ||
+                             is_same_type<K, sparse_linear_kernel<sample_type> >::value ));
+
+
+        scalar_matrix_type       weights;
+        scalar_vector_type       b;
+        std::vector<result_type> labels; 
+
+        const std::vector<result_type>& get_labels(
+        ) const { return labels; }
+
+        unsigned long number_of_classes (
+        ) const { return labels.size(); }
+
+        std::pair<result_type, scalar_type> predict (
+            const sample_type& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(weights.size() > 0 && 
+                        weights.nr() == (long)number_of_classes() &&
+                        weights.nr() == b.size(),
+                "\t pair<result_type,scalar_type> multiclass_linear_decision_function::predict(x)"
+                << "\n\t This object must be properly initialized before you can use it."
+                << "\n\t weights.size():      " << weights.size()
+                << "\n\t weights.nr():        " << weights.nr()
+                << "\n\t number_of_classes(): " << number_of_classes()
+                );
+
+            // Rather than doing something like, best_idx = index_of_max(weights*x-b)
+            // we do the following somewhat more complex thing because this supports
+            // both sparse and dense samples.
+            scalar_type best_val = dot(rowm(weights,0),x) - b(0);
+            unsigned long best_idx = 0;
+
+            for (unsigned long i = 1; i < labels.size(); ++i)
+            {
+                scalar_type temp = dot(rowm(weights,i),x) - b(i);
+                if (temp > best_val)
+                {
+                    best_val = temp;
+                    best_idx = i;
+                }
+            }
+
+            return std::make_pair(labels[best_idx], best_val);
+        }
+
+        result_type operator() (
+            const sample_type& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(weights.size() > 0 && 
+                        weights.nr() == (long)number_of_classes() &&
+                        weights.nr() == b.size(),
+                "\t result_type multiclass_linear_decision_function::operator()(x)"
+                << "\n\t This object must be properly initialized before you can use it."
+                << "\n\t weights.size():      " << weights.size()
+                << "\n\t weights.nr():        " << weights.nr()
+                << "\n\t number_of_classes(): " << number_of_classes()
+                );
+
+            return predict(x).first;
+        }
+    };
+
+    template <
+        typename K,
+        typename result_type_
+        >
+    void serialize (
+        const multiclass_linear_decision_function<K,result_type_>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.weights,         out);
+            serialize(item.b,               out);
+            serialize(item.labels,          out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type multiclass_linear_decision_function"); 
+        }
+    }
+
+    template <
+        typename K,
+        typename result_type_
+        >
+    void deserialize (
+        multiclass_linear_decision_function<K,result_type_>& item,
+        std::istream& in 
+    )
+    {
+        try
+        {
+            deserialize(item.weights,         in);
+            deserialize(item.b,               in);
+            deserialize(item.labels,          in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type multiclass_linear_decision_function"); 
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_FUNCTION
+
+
diff --git a/ml/dlib/dlib/svm/function_abstract.h b/ml/dlib/dlib/svm/function_abstract.h
new file mode 100644
index 000000000..783a68c50
--- /dev/null
+++ b/ml/dlib/dlib/svm/function_abstract.h
@@ -0,0 +1,997 @@
+// Copyright (C) 2007  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_FUNCTION_ABSTRACT_
+#ifdef DLIB_SVm_FUNCTION_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include "../statistics/statistics_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K
+        >
+    struct decision_function 
+    {
+        /*!
+            REQUIREMENTS ON K
+                K must be a kernel function object type as defined at the
+                top of dlib/svm/kernel_abstract.h
+
+            WHAT THIS OBJECT REPRESENTS 
+                This object represents a classification or regression function that was 
+                learned by a kernel based learning algorithm.   Therefore, it is a function 
+                object that takes a sample object and returns a scalar value.
+
+            THREAD SAFETY
+                It is always safe to use distinct instances of this object in different
+                threads.  However, when a single instance is shared between threads then
+                the following rules apply:
+                    It is safe to call operator() on this object from multiple threads so
+                    long as the kernel, K, is also threadsafe.  This is because operator()
+                    is a read-only operation.  However, any operation that modifies a
+                    decision_function is not threadsafe.
+        !*/
+
+        typedef K kernel_type;
+        typedef typename K::scalar_type scalar_type;
+        typedef typename K::scalar_type result_type;
+        typedef typename K::sample_type sample_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+        typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type;
+
+        scalar_vector_type alpha;
+        scalar_type        b;
+        K                  kernel_function;
+        sample_vector_type basis_vectors;
+
+        decision_function (
+        );
+        /*!
+            ensures
+                - #b == 0
+                - #alpha.nr() == 0
+                - #basis_vectors.nr() == 0
+        !*/
+
+        decision_function (
+            const decision_function& f
+        );
+        /*!
+            ensures
+                - #*this is a copy of f
+        !*/
+
+        decision_function (
+            const scalar_vector_type& alpha_,
+            const scalar_type& b_,
+            const K& kernel_function_,
+            const sample_vector_type& basis_vectors_
+        ) : alpha(alpha_), b(b_), kernel_function(kernel_function_), basis_vectors(basis_vectors_) {}
+        /*!
+            ensures
+                - populates the decision function with the given basis vectors, weights(i.e. alphas),
+                  b term, and kernel function.
+        !*/
+
+        result_type operator() (
+            const sample_type& x
+        ) const
+        /*!
+            ensures
+                - evaluates this sample according to the decision
+                  function contained in this object.
+        !*/
+        {
+            result_type temp = 0;
+            for (long i = 0; i < alpha.nr(); ++i)
+                temp += alpha(i) * kernel_function(x,basis_vectors(i));
+
+            return temp - b;
+        }
+    };
+
+    template <
+        typename K
+        >
+    void serialize (
+        const decision_function<K>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for decision_function
+    !*/
+
+    template <
+        typename K
+        >
+    void deserialize (
+        decision_function<K>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for decision_function
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename function_type 
+        >
+    struct probabilistic_function 
+    {
+        /*!
+            REQUIREMENTS ON function_type 
+                - function_type must be a function object with an overloaded
+                  operator() similar to the other function objects defined in
+                  this file.  The operator() should return a scalar type such as
+                  double or float.
+
+            WHAT THIS OBJECT REPRESENTS 
+                This object represents a binary decision function that returns an 
+                estimate of the probability that a given sample is in the +1 class.
+
+            THREAD SAFETY
+                It is always safe to use distinct instances of this object in different
+                threads.  However, when a single instance is shared between threads then
+                the following rules apply:
+                    It is safe to call operator() on this object from multiple threads so
+                    long as decision_funct is also threadsafe.  This is because operator()
+                    is a read-only operation.  However, any operation that modifies a
+                    probabilistic_function is not threadsafe.
+        !*/
+
+        typedef typename function_type::scalar_type scalar_type;
+        typedef typename function_type::result_type result_type;
+        typedef typename function_type::sample_type sample_type;
+        typedef typename function_type::mem_manager_type mem_manager_type;
+
+        scalar_type alpha;
+        scalar_type beta;
+        function_type decision_funct;
+
+        probabilistic_function (
+        );
+        /*!
+            ensures
+                - #alpha == 0
+                - #beta == 0
+                - #decision_funct has its initial value
+        !*/
+
+        probabilistic_function (
+            const probabilistic_function& f
+        );
+        /*!
+            ensures
+                - #*this is a copy of f
+        !*/
+
+        probabilistic_function (
+            const scalar_type a,
+            const scalar_type b,
+            const function_type& decision_funct_ 
+        ) : alpha(a), beta(b), decision_funct(decision_funct_) {}
+        /*!
+            ensures
+                - populates the probabilistic decision function with the given alpha, beta, 
+                  and decision function.
+        !*/
+
+        result_type operator() (
+            const sample_type& x
+        ) const
+        /*!
+            ensures
+                - returns a number P such that:
+                    - 0 <= P <= 1
+                    - P represents the probability that sample x is from 
+                      the class +1
+        !*/
+        {
+            // Evaluate the normal decision function
+            result_type f = decision_funct(x);
+            // Now basically normalize the output so that it is a properly
+            // conditioned probability of x being in the +1 class given
+            // the output of the decision function.
+            return 1/(1 + std::exp(alpha*f + beta));
+        }
+    };
+
+    template <
+        typename function_type
+        >
+    void serialize (
+        const probabilistic_function<function_type>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for probabilistic_function
+    !*/
+
+    template <
+        typename function_type
+        >
+    void deserialize (
+        probabilistic_function<function_type>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for probabilistic_function
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K
+        >
+    struct probabilistic_decision_function 
+    {
+        /*!
+            REQUIREMENTS ON K
+                K must be a kernel function object type as defined at the
+                top of dlib/svm/kernel_abstract.h
+
+            WHAT THIS OBJECT REPRESENTS 
+                This object represents a binary decision function that returns an 
+                estimate of the probability that a given sample is in the +1 class.
+
+                Note that this object is essentially just a copy of 
+                probabilistic_function but with the template argument 
+                changed from being a function type to a kernel type.  Therefore, this
+                type is just a convenient version of probabilistic_function
+                for the case where the decision function is a dlib::decision_function<K>.
+
+            THREAD SAFETY
+                It is always safe to use distinct instances of this object in different
+                threads.  However, when a single instance is shared between threads then
+                the following rules apply:
+                    It is safe to call operator() on this object from multiple threads so
+                    long as the kernel, K, is also threadsafe.  This is because operator()
+                    is a read-only operation.  However, any operation that modifies a
+                    probabilistic_decision_function is not threadsafe.
+        !*/
+
+        typedef K kernel_type;
+        typedef typename K::scalar_type scalar_type;
+        typedef typename K::scalar_type result_type;
+        typedef typename K::sample_type sample_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+
+        scalar_type alpha;
+        scalar_type beta;
+        decision_function<K> decision_funct;
+
+        probabilistic_decision_function (
+        );
+        /*!
+            ensures
+                - #alpha == 0
+                - #beta == 0
+                - #decision_funct has its initial value
+        !*/
+
+        probabilistic_decision_function (
+            const probabilistic_decision_function& f
+        );
+        /*!
+            ensures
+                - #*this is a copy of f
+        !*/
+
+        probabilistic_decision_function (
+            const probabilistic_function<decision_function<K> >& d
+        );
+        /*!
+            ensures
+                - #*this is a copy of f
+        !*/
+
+        probabilistic_decision_function (
+            const scalar_type a,
+            const scalar_type b,
+            const decision_function<K>& decision_funct_ 
+        ) : alpha(a), beta(b), decision_funct(decision_funct_) {}
+        /*!
+            ensures
+                - populates the probabilistic decision function with the given alpha, beta, 
+                  and decision_function.
+        !*/
+
+        result_type operator() (
+            const sample_type& x
+        ) const
+        /*!
+            ensures
+                - returns a number P such that:
+                    - 0 <= P <= 1
+                    - P represents the probability that sample x is from 
+                      the class +1
+        !*/
+        {
+            // Evaluate the normal decision function
+            result_type f = decision_funct(x);
+            // Now basically normalize the output so that it is a properly
+            // conditioned probability of x being in the +1 class given
+            // the output of the decision function.
+            return 1/(1 + std::exp(alpha*f + beta));
+        }
+    };
+
+    template <
+        typename K
+        >
+    void serialize (
+        const probabilistic_decision_function<K>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for probabilistic_decision_function
+    !*/
+
+    template <
+        typename K
+        >
+    void deserialize (
+        probabilistic_decision_function<K>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for probabilistic_decision_function
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K
+        >
+    class distance_function 
+    {
+        /*!
+            REQUIREMENTS ON K
+                K must be a kernel function object type as defined at the
+                top of dlib/svm/kernel_abstract.h
+
+            WHAT THIS OBJECT REPRESENTS 
+                This object represents a point in kernel induced feature space. 
+                You may use this object to find the distance from the point it 
+                represents to points in input space as well as other points
+                represented by distance_functions.
+
+                Specifically, if O() is the feature mapping associated with
+                the kernel used by this object.  Then this object represents
+                the point:  
+                    sum alpha(i)*O(basis_vectors(i))
+
+                I.e.  It represents a linear combination of the basis vectors where 
+                the weights of the linear combination are stored in the alpha vector.
+
+            THREAD SAFETY
+                It is always safe to use distinct instances of this object in different
+                threads.  However, when a single instance is shared between threads then
+                the following rules apply:
+                    It is safe to call the const members of this object from multiple
+                    threads so long as the kernel, K, is also threadsafe.  This is because
+                    the const members are purely read-only operations.  However, any
+                    operation that modifies a distance_function is not threadsafe.
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename K::scalar_type scalar_type;
+        typedef typename K::scalar_type result_type;
+        typedef typename K::sample_type sample_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+        typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type;
+
+        distance_function (
+        );
+        /*!
+            ensures
+                - #get_squared_norm() == 0
+                - #get_alpha().size() == 0
+                - #get_basis_vectors().size() == 0
+                - #get_kernel() == K() (i.e. the default value of the kernel)
+        !*/
+
+        explicit distance_function (
+            const kernel_type& kern
+        );
+        /*!
+            ensures
+                - #get_squared_norm() == 0
+                - #get_alpha().size() == 0
+                - #get_basis_vectors().size() == 0
+                - #get_kernel() == kern 
+        !*/
+
+        distance_function (
+            const kernel_type& kern,
+            const sample_type& samp
+        );
+        /*!
+            ensures
+                - This object represents the point in kernel feature space which
+                  corresponds directly to the given sample.  In particular this means
+                  that:
+                    - #get_kernel() == kern
+                    - #get_alpha() == a vector of length 1 which contains the value 1 
+                    - #get_basis_vectors() == a vector of length 1 which contains samp
+        !*/
+
+        distance_function (
+            const decision_function<K>& f
+        );
+        /*!
+            ensures
+                - Every decision_function represents a point in kernel feature space along
+                  with a bias value.  This constructor discards the bias value and creates 
+                  a distance_function which represents the point associated with the given 
+                  decision_function f.  In particular, this means:
+                    - #get_alpha() == f.alpha
+                    - #get_kernel() == f.kernel_function
+                    - #get_basis_vectors() == f.basis_vectors
+        !*/
+
+        distance_function (
+            const distance_function& f
+        );
+        /*!
+            requires
+                - f is a valid distance_function.  In particular, this means that
+                  f.alpha.size() == f.basis_vectors.size()
+            ensures
+                - #*this is a copy of f
+        !*/
+
+        distance_function (
+            const scalar_vector_type& alpha,
+            const scalar_type& squared_norm,
+            const K& kernel_function,
+            const sample_vector_type& basis_vectors
+        ); 
+        /*!
+            requires
+                - alpha.size() == basis_vectors.size()
+                - squared_norm == trans(alpha)*kernel_matrix(kernel_function,basis_vectors)*alpha
+                  (Basically, squared_norm needs to be set properly for this object to make sense.  
+                  You should prefer to use the following constructor which computes squared_norm for 
+                  you.  This version is provided just in case you already know squared_norm and 
+                  don't want to spend CPU cycles to recompute it.)
+            ensures
+                - populates the distance function with the given basis vectors, weights(i.e. alphas),
+                  squared_norm value, and kernel function. I.e.
+                    - #get_alpha() == alpha
+                    - #get_squared_norm() == squared_norm 
+                    - #get_kernel() == kernel_function
+                    - #get_basis_vectors() == basis_vectors
+        !*/
+
+        distance_function (
+            const scalar_vector_type& alpha,
+            const K& kernel_function,
+            const sample_vector_type& basis_vectors
+        );
+        /*!
+            requires
+                - alpha.size() == basis_vectors.size()
+            ensures
+                - populates the distance function with the given basis vectors, weights(i.e. alphas), 
+                  and kernel function.  The correct b value is computed automatically.  I.e.
+                    - #get_alpha() == alpha
+                    - #get_squared_norm() == trans(alpha)*kernel_matrix(kernel_function,basis_vectors)*alpha
+                      (i.e. get_squared_norm() will be automatically set to the correct value)
+                    - #get_kernel() == kernel_function
+                    - #get_basis_vectors() == basis_vectors
+        !*/
+
+        const scalar_vector_type& get_alpha (
+        ) const; 
+        /*!
+            ensures
+                - returns the set of weights on each basis vector in this object
+        !*/
+
+        const scalar_type& get_squared_norm (
+        ) const;
+        /*!
+            ensures
+                - returns the squared norm of the point represented by this object.  This value is
+                  equal to the following expression:
+                    trans(get_alpha()) * kernel_matrix(get_kernel(),get_basis_vectors()) * get_alpha()
+        !*/
+
+        const K& get_kernel(
+        ) const;
+        /*!
+            ensures
+                - returns the kernel used by this object.
+        !*/
+
+        const sample_vector_type& get_basis_vectors (
+        ) const;
+        /*!
+            ensures
+                - returns the set of basis vectors contained in this object
+        !*/
+
+        result_type operator() (
+            const sample_type& x
+        ) const;
+        /*!
+            ensures
+                - Let O(x) represent the point x projected into kernel induced feature space.
+                - let c == sum_over_i get_alpha()(i)*O(get_basis_vectors()(i)) == the point in kernel space that
+                  this object represents.  That is, c is the weighted sum of basis vectors.
+                - Then this object returns the distance between the point O(x) and c in kernel
+                  space. 
+        !*/
+
+        result_type operator() (
+            const distance_function& x
+        ) const;
+        /*!
+            requires
+                - kernel_function == x.kernel_function
+            ensures
+                - returns the distance between the points in kernel space represented by *this and x.
+        !*/
+
+        distance_function operator* (
+            const scalar_type& val
+        ) const;
+        /*!
+            ensures
+                - multiplies the point represented by *this by val and returns the result.  In
+                  particular, this function returns a decision_function DF such that:
+                    - DF.get_basis_vectors() == get_basis_vectors()
+                    - DF.get_kernel() == get_kernel() 
+                    - DF.get_alpha() == get_alpha() * val
+        !*/
+
+        distance_function operator/ (
+            const scalar_type& val
+        ) const;
+        /*!
+            ensures
+                - divides the point represented by *this by val and returns the result.  In
+                  particular, this function returns a decision_function DF such that:
+                    - DF.get_basis_vectors() == get_basis_vectors()
+                    - DF.get_kernel() == get_kernel() 
+                    - DF.get_alpha() == get_alpha() / val
+        !*/
+
+        distance_function operator+ (
+            const distance_function& rhs
+        ) const;
+        /*!
+            requires
+                - get_kernel() == rhs.get_kernel()
+            ensures
+                - returns a distance function DF such that:
+                    - DF represents the sum of the point represented by *this and rhs
+                    - DF.get_basis_vectors().size() == get_basis_vectors().size() + rhs.get_basis_vectors().size()
+                    - DF.get_basis_vectors() contains all the basis vectors in both *this and rhs.
+                    - DF.get_kernel() == get_kernel() 
+                    - DF.alpha == join_cols(get_alpha(), rhs.get_alpha())
+        !*/
+
+        distance_function operator- (
+            const distance_function& rhs
+        ) const;
+        /*!
+            requires
+                - get_kernel() == rhs.get_kernel()
+            ensures
+                - returns a distance function DF such that:
+                    - DF represents the difference of the point represented by *this and rhs (i.e. *this - rhs)
+                    - DF.get_basis_vectors().size() == get_basis_vectors().size() + rhs.get_basis_vectors().size()
+                    - DF.get_basis_vectors() contains all the basis vectors in both *this and rhs.
+                    - DF.get_kernel() == get_kernel() 
+                    - DF.alpha == join_cols(get_alpha(), -1 * rhs.get_alpha())
+        !*/
+    };
+
+    template <
+        typename K
+        >
+    distance_function<K> operator* (
+        const typename K::scalar_type& val,
+        const distance_function<K>& df
+    ) { return df*val; }
+    /*!
+        ensures
+            - multiplies the point represented by *this by val and returns the result.   This
+              function just allows multiplication syntax of the form val*df.
+    !*/
+
+    template <
+        typename K
+        >
+    void serialize (
+        const distance_function<K>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for distance_function
+    !*/
+
+    template <
+        typename K
+        >
+    void deserialize (
+        distance_function<K>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for distance_function
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename function_type,
+        typename normalizer_type = vector_normalizer<typename function_type::sample_type>
+        >
+    struct normalized_function 
+    {
+        /*!
+            REQUIREMENTS ON function_type 
+                - function_type must be a function object with an overloaded
+                  operator() similar to the other function objects defined in
+                  this file.
+
+            REQUIREMENTS ON normalizer_type
+                - normalizer_type must be a function object with an overloaded
+                  operator() that takes a sample_type and returns a sample_type.
+
+            WHAT THIS OBJECT REPRESENTS 
+                This object represents a container for another function
+                object and an instance of a normalizer function.  
+
+                It automatically normalizes all inputs before passing them
+                off to the contained function object.
+        !*/
+
+        typedef typename function_type::result_type result_type;
+        typedef typename function_type::sample_type sample_type;
+        typedef typename function_type::mem_manager_type mem_manager_type;
+
+        normalizer_type normalizer;
+        function_type function;
+
+        normalized_function (
+        );
+        /*!
+            ensures
+                - the members of this object have their default values
+        !*/
+
+        normalized_function (
+            const normalized_function& f
+        );
+        /*!
+            ensures
+                - #*this is a copy of f
+        !*/
+
+        normalized_function (
+            const vector_normalizer<sample_type>& normalizer_,
+            const function_type& funct 
+        ) : normalizer(normalizer_), function(funct) {}
+        /*!
+            ensures
+                - populates this object with the vector_normalizer and function object 
+        !*/
+
+        const std::vector<result_type> get_labels(
+        ) const;
+        /*!
+            ensures
+                - returns function.get_labels()
+        !*/
+
+        unsigned long number_of_classes (
+        ) const;
+        /*!
+            ensures
+                - returns function.number_of_classes()
+        !*/
+
+        result_type operator() (
+            const sample_type& x
+        ) const
+        /*!
+            ensures
+                - returns function(normalizer(x))
+        !*/
+    };
+
+    template <
+        typename function_type,
+        typename normalizer_type 
+        >
+    void serialize (
+        const normalized_function<function_type, normalizer_type>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for normalized_function
+    !*/
+
+    template <
+        typename function_type,
+        typename normalizer_type 
+        >
+    void deserialize (
+        normalized_function<function_type, normalizer_type>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for normalized_function
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K
+        >
+    struct projection_function 
+    {
+        /*!
+            REQUIREMENTS ON K
+                K must be a kernel function object type as defined at the
+                top of dlib/svm/kernel_abstract.h
+
+            WHAT THIS OBJECT REPRESENTS 
+                This object represents a function that takes a data sample and projects
+                it into kernel feature space.  The result is a real valued column vector that 
+                represents a point in a kernel feature space.
+
+            THREAD SAFETY
+                It is always safe to use distinct instances of this object in different
+                threads.  However, when a single instance is shared between threads then
+                the following rules apply:
+                    Instances of this object have a mutable cache which is used by const
+                    member functions.  Therefore, it is not safe to use one instance of
+                    this object from multiple threads (unless protected by a mutex).
+        !*/
+
+        typedef K kernel_type;
+        typedef typename K::scalar_type scalar_type;
+        typedef typename K::sample_type sample_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type;
+        typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type;
+        typedef scalar_vector_type result_type;
+
+        scalar_matrix_type weights;
+        K                  kernel_function;
+        sample_vector_type basis_vectors;
+
+        projection_function (
+        );
+        /*!
+            ensures
+                - #weights.size() == 0
+                - #basis_vectors.size() == 0
+        !*/
+
+        projection_function (
+            const projection_function& f
+        );
+        /*!
+            ensures
+                - #*this is a copy of f
+        !*/
+
+        projection_function (
+            const scalar_matrix_type& weights_,
+            const K& kernel_function_,
+            const sample_vector_type& basis_vectors_
+        ) : weights(weights_), kernel_function(kernel_function_), basis_vectors(basis_vectors_) {}
+        /*!
+            ensures
+                - populates the projection function with the given basis vectors, weights,
+                  and kernel function.
+        !*/
+
+        long out_vector_size (
+        ) const;
+        /*!
+            ensures
+                - returns weights.nr()
+                  (i.e. returns the dimensionality of the vectors output by this projection_function.)
+        !*/
+
+        const result_type& operator() (
+            const sample_type& x
+        ) const
+        /*!
+            requires
+                - weights.nc() == basis_vectors.size()
+                - out_vector_size() > 0
+            ensures
+                - Takes the given x sample and projects it onto part of the kernel feature 
+                  space spanned by the basis_vectors.  The exact projection arithmetic is 
+                  defined below.
+        !*/
+        {
+            // Run the x sample through all the basis functions we have and then
+            // multiply it by the weights matrix and return the result.  Note that
+            // the temp vectors are here to avoid reallocating their memory every
+            // time this function is called.
+            temp1 = kernel_matrix(kernel_function, basis_vectors, x);
+            temp2 = weights*temp1;
+            return temp2;
+        }
+
+    private:
+        mutable result_type temp1, temp2;
+    };
+
+    template <
+        typename K
+        >
+    void serialize (
+        const projection_function<K>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for projection_function
+    !*/
+
+    template <
+        typename K
+        >
+    void deserialize (
+        projection_function<K>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for projection_function
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K,
+        typename result_type_ = typename K::scalar_type 
+        >
+    struct multiclass_linear_decision_function
+    {
+        /*!
+            REQUIREMENTS ON K
+                K must be either linear_kernel or sparse_linear_kernel.  
+
+            WHAT THIS OBJECT REPRESENTS 
+                This object represents a multiclass classifier built out of a set of 
+                binary classifiers.  Each binary classifier is used to vote for the 
+                correct multiclass label using a one vs. all strategy.  Therefore, 
+                if you have N classes then there will be N binary classifiers inside 
+                this object.  Additionally, this object is linear in the sense that
+                each of these binary classifiers is a simple linear plane.
+
+            THREAD SAFETY
+                It is always safe to use distinct instances of this object in different
+                threads.  However, when a single instance is shared between threads then
+                the following rules apply:
+                    It is safe to call the const member functions of this object from
+                    multiple threads.  This is because the const members are purely
+                    read-only operations.  However, any operation that modifies a
+                    multiclass_linear_decision_function is not threadsafe.
+        !*/
+
+        typedef result_type_ result_type;
+
+        typedef K kernel_type;
+        typedef typename K::scalar_type scalar_type;
+        typedef typename K::sample_type sample_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type;
+
+        scalar_matrix_type       weights;
+        scalar_vector_type       b;
+        std::vector<result_type> labels; 
+
+        const std::vector<result_type>& get_labels(
+        ) const { return labels; }
+        /*!
+            ensures
+                - returns a vector containing all the labels which can be
+                  predicted by this object.
+        !*/
+
+        unsigned long number_of_classes (
+        ) const;
+        /*!
+            ensures
+                - returns get_labels().size()
+                  (i.e. returns the number of different labels/classes predicted by
+                  this object)
+        !*/
+
+        std::pair<result_type, scalar_type> predict (
+            const sample_type& x
+        ) const;
+        /*!
+            requires
+                - weights.size() > 0
+                - weights.nr() == number_of_classes() == b.size()
+                - if (x is a dense vector, i.e. a dlib::matrix) then
+                    - is_vector(x) == true
+                    - x.size() == weights.nc()
+                      (i.e. it must be legal to multiply weights with x)
+            ensures
+                - Returns the predicted label for the x sample and also it's score.  
+                  In particular, it returns the following:
+                    std::make_pair(labels[index_of_max(weights*x-b)],  max(weights*x-b))
+        !*/
+
+        result_type operator() (
+            const sample_type& x
+        ) const;
+        /*!
+            requires
+                - weights.size() > 0
+                - weights.nr() == number_of_classes() == b.size()
+                - if (x is a dense vector, i.e. a dlib::matrix) then
+                    - is_vector(x) == true
+                    - x.size() == weights.nc()
+                      (i.e. it must be legal to multiply weights with x)
+            ensures
+                - Returns the predicted label for the x sample.  In particular, it returns
+                  the following:
+                    labels[index_of_max(weights*x-b)]
+                  Or in other words, this function returns predict(x).first
+        !*/
+    };
+
+    template <
+        typename K,
+        typename result_type_
+        >
+    void serialize (
+        const multiclass_linear_decision_function<K,result_type_>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for multiclass_linear_decision_function
+    !*/
+
+    template <
+        typename K,
+        typename result_type_
+        >
+    void deserialize (
+        multiclass_linear_decision_function<K,result_type_>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for multiclass_linear_decision_function
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_FUNCTION_ABSTRACT_
+
+
+
diff --git a/ml/dlib/dlib/svm/kcentroid.h b/ml/dlib/dlib/svm/kcentroid.h
new file mode 100644
index 000000000..5f380486a
--- /dev/null
+++ b/ml/dlib/dlib/svm/kcentroid.h
@@ -0,0 +1,614 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_KCENTROId_
+#define DLIB_KCENTROId_
+
+#include <vector>
+
+#include "kcentroid_abstract.h"
+#include "../matrix.h"
+#include "function.h"
+#include "../std_allocator.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename kernel_type>
+    class kcentroid
+    {
+        /*!
+            This object represents a weighted sum of sample points in a kernel induced
+            feature space.  It can be used to kernelize any algorithm that requires only
+            the ability to perform vector addition, subtraction, scalar multiplication,
+            and inner products.  It uses the sparsification technique described in the 
+            paper The Kernel Recursive Least Squares Algorithm by Yaakov Engel.
+
+            To understand the code it would also be useful to consult page 114 of the book 
+            Kernel Methods for Pattern Analysis by Taylor and Cristianini as well as page 554 
+            (particularly equation 18.31) of the book Learning with Kernels by Scholkopf and 
+            Smola.  Everything you really need to know is in the Engel paper.  But the other 
+            books help give more perspective on the issues involved.
+
+
+            INITIAL VALUE
+                - min_strength == 0
+                - min_vect_idx == 0
+                - K_inv.size() == 0
+                - K.size() == 0
+                - dictionary.size() == 0
+                - bias == 0
+                - bias_is_stale == false
+
+            CONVENTION
+                - max_dictionary_size() == my_max_dictionary_size
+                - get_kernel() == kernel
+
+                - K.nr() == dictionary.size()
+                - K.nc() == dictionary.size()
+                - for all valid r,c:
+                    - K(r,c) == kernel(dictionary[r], dictionary[c])
+                - K_inv == inv(K)
+
+                - if (dictionary.size() == my_max_dictionary_size && my_remove_oldest_first == false) then
+                    - for all valid 0 < i < dictionary.size():
+                        - Let STRENGTHS[i] == the delta you would get for dictionary[i] (i.e. Approximately 
+                          Linearly Dependent value) if you removed dictionary[i] from this object and then 
+                          tried to add it back in.
+                        - min_strength == the minimum value from STRENGTHS
+                        - min_vect_idx == the index of the element in STRENGTHS with the smallest value
+
+        !*/
+
+    public:
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+        kcentroid (
+        ) : 
+            my_remove_oldest_first(false),
+            my_tolerance(0.001),
+            my_max_dictionary_size(1000000),
+            bias(0),
+            bias_is_stale(false)
+        {
+            clear_dictionary();
+        }
+
+        explicit kcentroid (
+            const kernel_type& kernel_, 
+            scalar_type tolerance_ = 0.001,
+            unsigned long max_dictionary_size_ = 1000000,
+            bool remove_oldest_first_ = false 
+        ) : 
+            my_remove_oldest_first(remove_oldest_first_),
+            kernel(kernel_), 
+            my_tolerance(tolerance_),
+            my_max_dictionary_size(max_dictionary_size_),
+            bias(0),
+            bias_is_stale(false)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(tolerance_ > 0 && max_dictionary_size_ > 1,
+                "\tkcentroid::kcentroid()"
+                << "\n\t You have to give a positive tolerance"
+                << "\n\t this:                 " << this
+                << "\n\t tolerance_:           " << tolerance_ 
+                << "\n\t max_dictionary_size_: " << max_dictionary_size_ 
+                );
+
+            clear_dictionary();
+        }
+
+        scalar_type tolerance() const
+        {
+            return my_tolerance;
+        }
+
+        unsigned long max_dictionary_size() const
+        {
+            return my_max_dictionary_size;
+        }
+
+        bool remove_oldest_first (
+        ) const
+        {
+            return my_remove_oldest_first;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel;
+        }
+
+        void clear_dictionary ()
+        {
+            dictionary.clear();
+            alpha.clear();
+
+            min_strength = 0;
+            min_vect_idx = 0;
+            K_inv.set_size(0,0);
+            K.set_size(0,0);
+            samples_seen = 0;
+            bias = 0;
+            bias_is_stale = false;
+        }
+
+        scalar_type operator() (
+            const kcentroid& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(x.get_kernel() == get_kernel(),
+                "\tscalar_type kcentroid::operator()(const kcentroid& x)"
+                << "\n\tYou can only compare two kcentroid objects if they use the same kernel"
+                << "\n\tthis: " << this
+                );
+
+            // make sure the bias terms are up to date
+            refresh_bias();
+            x.refresh_bias();
+
+            scalar_type temp = x.bias + bias - 2*inner_product(x);
+
+            if (temp > 0)
+                return std::sqrt(temp);
+            else
+                return 0;
+        }
+
+        scalar_type inner_product (
+            const sample_type& x
+        ) const
+        {
+            scalar_type temp = 0; 
+            for (unsigned long i = 0; i < alpha.size(); ++i)
+                temp += alpha[i]*kernel(dictionary[i], x);
+            return temp;
+        }
+
+        scalar_type inner_product (
+            const kcentroid& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(x.get_kernel() == get_kernel(),
+                "\tscalar_type kcentroid::inner_product(const kcentroid& x)"
+                << "\n\tYou can only compare two kcentroid objects if they use the same kernel"
+                << "\n\tthis: " << this
+                );
+
+            scalar_type temp = 0; 
+            for (unsigned long i = 0; i < alpha.size(); ++i)
+            {
+                for (unsigned long j = 0; j < x.alpha.size(); ++j)
+                {
+                    temp += alpha[i]*x.alpha[j]*kernel(dictionary[i], x.dictionary[j]);
+                }
+            }
+            return temp;
+        }
+
+        scalar_type squared_norm (
+        ) const
+        {
+            refresh_bias();
+            return bias;
+        }
+
+        scalar_type operator() (
+            const sample_type& x
+        ) const
+        {
+            // make sure the bias terms are up to date
+            refresh_bias();
+
+            const scalar_type kxx = kernel(x,x);
+
+            scalar_type temp = kxx + bias - 2*inner_product(x);
+            if (temp > 0)
+                return std::sqrt(temp);
+            else
+                return 0;
+        }
+
+        scalar_type samples_trained (
+        ) const
+        {
+            return samples_seen;
+        }
+
+        scalar_type test_and_train (
+            const sample_type& x
+        )
+        {
+            ++samples_seen;
+            const scalar_type xscale = 1/samples_seen;
+            const scalar_type cscale = 1-xscale;
+            return train_and_maybe_test(x,cscale,xscale,true);
+        }
+
+        void train (
+            const sample_type& x
+        )
+        {
+            ++samples_seen;
+            const scalar_type xscale = 1/samples_seen;
+            const scalar_type cscale = 1-xscale;
+            train_and_maybe_test(x,cscale,xscale,false);
+        }
+
+        scalar_type test_and_train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            ++samples_seen;
+            return train_and_maybe_test(x,cscale,xscale,true);
+        }
+
+        void scale_by (
+            scalar_type cscale
+        )
+        {
+            for (unsigned long i = 0; i < alpha.size(); ++i)
+            {
+                alpha[i] = cscale*alpha[i];
+            }
+        }
+
+        void train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            ++samples_seen;
+            train_and_maybe_test(x,cscale,xscale,false);
+        }
+
+        void swap (
+            kcentroid& item
+        )
+        {
+            exchange(min_strength, item.min_strength);
+            exchange(min_vect_idx, item.min_vect_idx);
+            exchange(my_remove_oldest_first, item.my_remove_oldest_first);
+
+            exchange(kernel, item.kernel);
+            dictionary.swap(item.dictionary);
+            alpha.swap(item.alpha);
+            K_inv.swap(item.K_inv);
+            K.swap(item.K);
+            exchange(my_tolerance, item.my_tolerance);
+            exchange(samples_seen, item.samples_seen);
+            exchange(bias, item.bias);
+            a.swap(item.a);
+            k.swap(item.k);
+            exchange(bias_is_stale, item.bias_is_stale);
+            exchange(my_max_dictionary_size, item.my_max_dictionary_size);
+        }
+
+        unsigned long dictionary_size (
+        ) const { return dictionary.size(); }
+
+        friend void serialize(const kcentroid& item, std::ostream& out)
+        {
+            serialize(item.min_strength, out);
+            serialize(item.min_vect_idx, out);
+            serialize(item.my_remove_oldest_first, out);
+
+            serialize(item.kernel, out);
+            serialize(item.dictionary, out);
+            serialize(item.alpha, out);
+            serialize(item.K_inv, out);
+            serialize(item.K, out);
+            serialize(item.my_tolerance, out);
+            serialize(item.samples_seen, out);
+            serialize(item.bias, out);
+            serialize(item.bias_is_stale, out);
+            serialize(item.my_max_dictionary_size, out);
+        }
+
+        friend void deserialize(kcentroid& item, std::istream& in)
+        {
+            deserialize(item.min_strength, in);
+            deserialize(item.min_vect_idx, in);
+            deserialize(item.my_remove_oldest_first, in);
+
+            deserialize(item.kernel, in);
+            deserialize(item.dictionary, in);
+            deserialize(item.alpha, in);
+            deserialize(item.K_inv, in);
+            deserialize(item.K, in);
+            deserialize(item.my_tolerance, in);
+            deserialize(item.samples_seen, in);
+            deserialize(item.bias, in);
+            deserialize(item.bias_is_stale, in);
+            deserialize(item.my_max_dictionary_size, in);
+        }
+
+        distance_function<kernel_type> get_distance_function (
+        ) const
+        {
+            refresh_bias();
+            return distance_function<kernel_type>(mat(alpha),
+                                                  bias, 
+                                                  kernel, 
+                                                  mat(dictionary));
+        }
+
+    private:
+
+        void refresh_bias (
+        ) const 
+        {
+            if (bias_is_stale)
+            {
+                bias_is_stale = false;
+                // recompute the bias term
+                bias = sum(pointwise_multiply(K, mat(alpha)*trans(mat(alpha))));
+            }
+        }
+
+        scalar_type train_and_maybe_test (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale,
+            bool do_test
+        )
+        {
+            scalar_type test_result = 0;
+            const scalar_type kx = kernel(x,x);
+            if (alpha.size() == 0)
+            {
+                // just ignore this sample if it is the zero vector (or really close to being zero)
+                if (std::abs(kx) > std::numeric_limits<scalar_type>::epsilon())
+                {
+                    // set initial state since this is the first training example we have seen
+
+                    K_inv.set_size(1,1);
+                    K_inv(0,0) = 1/kx;
+                    K.set_size(1,1);
+                    K(0,0) = kx;
+
+                    alpha.push_back(xscale);
+                    dictionary.push_back(x);
+                }
+                else
+                {
+                    // the distance from an empty kcentroid and the zero vector is zero by definition.
+                    return 0;
+                }
+            }
+            else
+            {
+                // fill in k
+                k.set_size(alpha.size());
+                for (long r = 0; r < k.nr(); ++r)
+                    k(r) = kernel(x,dictionary[r]);
+
+                if (do_test)
+                {
+                    refresh_bias();
+                    test_result = std::sqrt(kx + bias - 2*trans(mat(alpha))*k);
+                }
+
+                // compute the error we would have if we approximated the new x sample
+                // with the dictionary.  That is, do the ALD test from the KRLS paper.
+                a = K_inv*k;
+                scalar_type delta = kx - trans(k)*a;
+
+                // if this new vector isn't approximately linearly dependent on the vectors
+                // in our dictionary.
+                if (delta > min_strength && delta > my_tolerance)
+                {
+                    bool need_to_update_min_strength = false;
+                    if (dictionary.size() >= my_max_dictionary_size)
+                    {
+                        // We need to remove one of the old members of the dictionary before
+                        // we proceed with adding a new one.  
+                        long idx_to_remove;
+                        if (my_remove_oldest_first)
+                        {
+                            // remove the oldest one
+                            idx_to_remove = 0;
+                        }
+                        else
+                        {
+                            // if we have never computed the min_strength then we should compute it 
+                            if (min_strength == 0)
+                                recompute_min_strength();
+
+                            // select the dictionary vector that is most linearly dependent for removal
+                            idx_to_remove = min_vect_idx;
+                            need_to_update_min_strength = true;
+                        }
+
+                        remove_dictionary_vector(idx_to_remove);
+
+                        // recompute these guys since they were computed with the old
+                        // kernel matrix
+                        k = remove_row(k,idx_to_remove);
+                        a = K_inv*k;
+                        delta = kx - trans(k)*a;
+                    }
+
+                    // add x to the dictionary
+                    dictionary.push_back(x);
+
+
+                    // update K_inv by computing the new one in the temp matrix (equation 3.14)
+                    matrix<scalar_type,0,0,mem_manager_type> temp(K_inv.nr()+1, K_inv.nc()+1);
+                    // update the middle part of the matrix
+                    set_subm(temp, get_rect(K_inv)) = K_inv + a*trans(a)/delta;
+                    // update the right column of the matrix
+                    set_subm(temp, 0, K_inv.nr(),K_inv.nr(),1) = -a/delta;
+                    // update the bottom row of the matrix
+                    set_subm(temp, K_inv.nr(), 0, 1, K_inv.nr()) = trans(-a/delta);
+                    // update the bottom right corner of the matrix
+                    temp(K_inv.nr(), K_inv.nc()) = 1/delta;
+                    // put temp into K_inv
+                    temp.swap(K_inv);
+
+
+
+                    // update K (the kernel matrix)
+                    temp.set_size(K.nr()+1, K.nc()+1);
+                    set_subm(temp, get_rect(K)) = K;
+                    // update the right column of the matrix
+                    set_subm(temp, 0, K.nr(),K.nr(),1) = k;
+                    // update the bottom row of the matrix
+                    set_subm(temp, K.nr(), 0, 1, K.nr()) = trans(k);
+                    temp(K.nr(), K.nc()) = kx;
+                    // put temp into K
+                    temp.swap(K);
+
+
+                    // now update the alpha vector 
+                    for (unsigned long i = 0; i < alpha.size(); ++i)
+                    {
+                        alpha[i] *= cscale;
+                    }
+                    alpha.push_back(xscale);
+
+
+                    if (need_to_update_min_strength)
+                    {
+                        // now we have to recompute the min_strength in this case
+                        recompute_min_strength();
+                    }
+                }
+                else
+                {
+                    // update the alpha vector so that this new sample has been added into
+                    // the mean vector we are accumulating
+                    for (unsigned long i = 0; i < alpha.size(); ++i)
+                    {
+                        alpha[i] = cscale*alpha[i] + xscale*a(i);
+                    }
+                }
+            }
+
+            bias_is_stale = true;
+            
+            return test_result;
+        }
+
+        void remove_dictionary_vector (
+            long i
+        )
+        /*!
+            requires
+                - 0 <= i < dictionary.size()
+            ensures
+                - #dictionary.size() == dictionary.size() - 1
+                - #alpha.size() == alpha.size() - 1
+                - updates the K_inv matrix so that it is still a proper inverse of the
+                  kernel matrix
+                - also removes the necessary row and column from the K matrix
+                - uses the this->a variable so after this function runs that variable
+                  will contain a different value.  
+        !*/
+        {
+            // remove the dictionary vector 
+            dictionary.erase(dictionary.begin()+i);
+
+            // remove the i'th vector from the inverse kernel matrix.  This formula is basically
+            // just the reverse of the way K_inv is updated by equation 3.14 during normal training.
+            K_inv = removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i);
+
+            // now compute the updated alpha values to take account that we just removed one of 
+            // our dictionary vectors
+            a = (K_inv*remove_row(K,i)*mat(alpha));
+
+            // now copy over the new alpha values
+            alpha.resize(alpha.size()-1);
+            for (unsigned long k = 0; k < alpha.size(); ++k)
+            {
+                alpha[k] = a(k);
+            }
+
+            // update the K matrix as well
+            K = removerc(K,i,i);
+        }
+
+        void recompute_min_strength (
+        )
+        /*!
+            ensures
+                - recomputes the min_strength and min_vect_idx values
+                  so that they are correct with respect to the CONVENTION
+                - uses the this->a variable so after this function runs that variable
+                  will contain a different value.  
+        !*/
+        {
+            min_strength = std::numeric_limits<scalar_type>::max();
+
+            // here we loop over each dictionary vector and compute what its delta would be if
+            // we were to remove it from the dictionary and then try to add it back in.
+            for (unsigned long i = 0; i < dictionary.size(); ++i)
+            {
+                // compute a = K_inv*k but where dictionary vector i has been removed
+                a = (removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i)) *
+                    (remove_row(colm(K,i),i));
+                scalar_type delta = K(i,i) - trans(remove_row(colm(K,i),i))*a;
+
+                if (delta < min_strength)
+                {
+                    min_strength = delta;
+                    min_vect_idx = i;
+                }
+            }
+        }
+
+
+
+        typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type;
+        typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type;
+        typedef std::vector<sample_type,alloc_sample_type> dictionary_vector_type;
+        typedef std::vector<scalar_type,alloc_scalar_type> alpha_vector_type;
+
+
+        scalar_type min_strength;
+        unsigned long min_vect_idx;
+        bool my_remove_oldest_first;
+
+        kernel_type kernel;
+        dictionary_vector_type dictionary;
+        alpha_vector_type alpha;
+
+        matrix<scalar_type,0,0,mem_manager_type> K_inv;
+        matrix<scalar_type,0,0,mem_manager_type> K;
+
+        scalar_type my_tolerance;
+        unsigned long my_max_dictionary_size;
+        scalar_type samples_seen;
+        mutable scalar_type bias;
+        mutable bool bias_is_stale;
+
+
+        // temp variables here just so we don't have to reconstruct them over and over.  Thus, 
+        // they aren't really part of the state of this object.
+        matrix<scalar_type,0,1,mem_manager_type> a;
+        matrix<scalar_type,0,1,mem_manager_type> k;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename kernel_type>
+    void swap(kcentroid<kernel_type>& a, kcentroid<kernel_type>& b)
+    { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_KCENTROId_
+
diff --git a/ml/dlib/dlib/svm/kcentroid_abstract.h b/ml/dlib/dlib/svm/kcentroid_abstract.h
new file mode 100644
index 000000000..44b94c813
--- /dev/null
+++ b/ml/dlib/dlib/svm/kcentroid_abstract.h
@@ -0,0 +1,339 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_KCENTROId_ABSTRACT_
+#ifdef DLIB_KCENTROId_ABSTRACT_
+
+#include "../algs.h"
+#include "../serialize.h"
+#include "kernel_abstract.h"
+
+namespace dlib
+{
+
+    template <
+        typename kernel_type
+        >
+    class kcentroid
+    {
+        /*!
+            REQUIREMENTS ON kernel_type
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            INITIAL VALUE
+                - dictionary_size() == 0
+                - samples_trained() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a weighted sum of sample points in a kernel induced
+                feature space.  It can be used to kernelize any algorithm that requires only
+                the ability to perform vector addition, subtraction, scalar multiplication,
+                and inner products.  
+
+                An example use of this object is as an online algorithm for recursively estimating 
+                the centroid of a sequence of training points.  This object then allows you to 
+                compute the distance between the centroid and any test points.  So you can use 
+                this object to predict how similar a test point is to the data this object has 
+                been trained on (larger distances from the centroid indicate dissimilarity/anomalous 
+                points).  
+
+                Also note that the algorithm internally keeps a set of "dictionary vectors" 
+                that are used to represent the centroid.  You can force the algorithm to use 
+                no more than a set number of vectors by setting the 3rd constructor argument 
+                to whatever you want.  
+
+                This object uses the sparsification technique described in the paper The 
+                Kernel Recursive Least Squares Algorithm by Yaakov Engel.  This technique
+                allows us to keep the number of dictionary vectors down to a minimum.  In fact,
+                the object has a user selectable tolerance parameter that controls the trade off
+                between accuracy and number of stored dictionary vectors.
+        !*/
+
+    public:
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+        kcentroid (
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+                - #tolerance() == 0.001 
+                - #get_kernel() == kernel_type() (i.e. whatever the kernel's default value is) 
+                - #max_dictionary_size() == 1000000
+                - #remove_oldest_first() == false 
+        !*/
+
+        explicit kcentroid (
+            const kernel_type& kernel_, 
+            scalar_type tolerance_ = 0.001,
+            unsigned long max_dictionary_size_ = 1000000,
+            bool remove_oldest_first_ = false 
+        );
+        /*!
+            requires
+                - tolerance > 0
+                - max_dictionary_size_ > 1
+            ensures
+                - this object is properly initialized
+                - #tolerance() == tolerance_
+                - #get_kernel() == kernel_
+                - #max_dictionary_size() == max_dictionary_size_
+                - #remove_oldest_first() == remove_oldest_first_
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a const reference to the kernel used by this object
+        !*/
+
+        unsigned long max_dictionary_size(
+        ) const;
+        /*!
+            ensures
+                - returns the maximum number of dictionary vectors this object will 
+                  use at a time.  That is, dictionary_size() will never be greater 
+                  than max_dictionary_size().
+        !*/
+
+        bool remove_oldest_first (
+        ) const;
+        /*!
+            ensures
+                - When the maximum dictionary size is reached this object sometimes
+                  needs to discard dictionary vectors when new samples are added via
+                  one of the train functions.  When this happens this object chooses 
+                  the dictionary vector to discard based on the setting of the
+                  remove_oldest_first() parameter.
+                - if (remove_oldest_first() == true) then
+                    - This object discards the oldest dictionary vectors when necessary.  
+                      This is an appropriate mode when using this object in an online
+                      setting and the input training samples come from a slowly 
+                      varying distribution.
+                - else (remove_oldest_first() == false) then
+                    - This object discards the most linearly dependent dictionary vectors 
+                      when necessary.  This it the default behavior and should be used 
+                      in most cases.
+        !*/
+
+        unsigned long dictionary_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of basis vectors in the dictionary.  These are
+                  the basis vectors used by this object to represent a point in kernel
+                  feature space.
+        !*/
+
+        scalar_type samples_trained (
+        ) const;
+        /*!
+            ensures
+                - returns the number of samples this object has been trained on so far
+        !*/
+
+        scalar_type tolerance(
+        ) const;
+        /*!
+            ensures
+                - returns the tolerance to use for the approximately linearly dependent 
+                  test used for sparsification (see the KRLS paper for details).  This is 
+                  a number which governs how accurately this object will approximate the 
+                  centroid it is learning.  Smaller values generally result in a more 
+                  accurate estimate while also resulting in a bigger set of vectors in 
+                  the dictionary.  Bigger tolerances values result in a less accurate 
+                  estimate but also in less dictionary vectors.  (Note that in any case, 
+                  the max_dictionary_size() limits the number of dictionary vectors no 
+                  matter the setting of the tolerance)
+                - The exact meaning of the tolerance parameter is the following: 
+                  Imagine that we have an empirical_kernel_map that contains all
+                  the current dictionary vectors.  Then the tolerance is the minimum
+                  projection error (as given by empirical_kernel_map::project()) required
+                  to cause us to include a new vector in the dictionary.  So each time
+                  you call train() the kcentroid basically just computes the projection
+                  error for that new sample and if it is larger than the tolerance
+                  then that new sample becomes part of the dictionary.
+        !*/
+
+        void clear_dictionary (
+        );
+        /*!
+            ensures
+                - clears out all learned data (e.g. #dictionary_size() == 0)
+                - #samples_seen() == 0
+        !*/
+
+        scalar_type operator() (
+            const kcentroid& x
+        ) const;
+        /*!
+            requires
+                - x.get_kernel() == get_kernel()
+            ensures
+                - returns the distance in kernel feature space between this centroid and the
+                  centroid represented by x.  
+        !*/
+
+        scalar_type operator() (
+            const sample_type& x
+        ) const;
+        /*!
+            ensures
+                - returns the distance in kernel feature space between the sample x and the
+                  current estimate of the centroid of the training samples given
+                  to this object so far.
+        !*/
+
+        scalar_type inner_product (
+            const sample_type& x
+        ) const;
+        /*!
+            ensures
+                - returns the inner product of the given x point and the current
+                  estimate of the centroid of the training samples given to this object
+                  so far.
+        !*/
+
+        scalar_type inner_product (
+            const kcentroid& x
+        ) const;
+        /*!
+            requires
+                - x.get_kernel() == get_kernel()
+            ensures
+                - returns the inner product between x and this centroid object.
+        !*/
+
+        scalar_type squared_norm (
+        ) const;
+        /*!
+            ensures
+                - returns the squared norm of the centroid vector represented by this
+                  object.  I.e. returns this->inner_product(*this)
+        !*/
+
+        void train (
+            const sample_type& x
+        );
+        /*!
+            ensures
+                - adds the sample x into the current estimate of the centroid
+                - also note that calling this function is equivalent to calling
+                  train(x, samples_trained()/(samples_trained()+1.0, 1.0/(samples_trained()+1.0).  
+                  That is, this function finds the normal unweighted centroid of all training points.
+        !*/
+
+        void train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        );
+        /*!
+            ensures
+                - adds the sample x into the current estimate of the centroid but
+                  uses a user given scale.  That is, this function performs:
+                    - new_centroid = cscale*old_centroid + xscale*x
+                - This function allows you to weight different samples however 
+                  you want.
+        !*/
+
+        void scale_by (
+            scalar_type cscale
+        );
+        /*!
+            ensures
+                - multiplies the current centroid vector by the given scale value.  
+                  This function is equivalent to calling train(some_x_value, cscale, 0).
+                  So it performs:   
+                    - new_centroid == cscale*old_centroid
+        !*/
+
+        scalar_type test_and_train (
+            const sample_type& x
+        );
+        /*!
+            ensures
+                - calls train(x)
+                - returns (*this)(x)
+                - The reason this function exists is because train() and operator() 
+                  both compute some of the same things.  So this function is more efficient
+                  than calling both individually.
+        !*/
+
+        scalar_type test_and_train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        );
+        /*!
+            ensures
+                - calls train(x,cscale,xscale)
+                - returns (*this)(x)
+                - The reason this function exists is because train() and operator() 
+                  both compute some of the same things.  So this function is more efficient
+                  than calling both individually.
+        !*/
+
+        void swap (
+            kcentroid& item
+        );
+        /*!
+            ensures
+                - swaps *this with item
+        !*/
+
+        distance_function<kernel_type> get_distance_function (
+        ) const;
+        /*!
+            ensures
+                - returns a distance function F that represents the point learned
+                  by this object so far.  I.e. it is the case that:
+                    - for all x: F(x) == (*this)(x)
+        !*/
+
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type
+        >
+    void swap(
+        kcentroid<kernel_type>& a, 
+        kcentroid<kernel_type>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap function
+    !*/
+
+    template <
+        typename kernel_type
+        >
+    void serialize (
+        const kcentroid<kernel_type>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for kcentroid objects
+    !*/
+
+    template <
+        typename kernel_type 
+        >
+    void deserialize (
+        kcentroid<kernel_type>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for kcentroid objects
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_KCENTROId_ABSTRACT_
+
diff --git a/ml/dlib/dlib/svm/kcentroid_overloads.h b/ml/dlib/dlib/svm/kcentroid_overloads.h
new file mode 100644
index 000000000..9c39f3d78
--- /dev/null
+++ b/ml/dlib/dlib/svm/kcentroid_overloads.h
@@ -0,0 +1,1324 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_KCENTROId_OVERLOADS_
+#define DLIB_KCENTROId_OVERLOADS_
+
+#include "kcentroid_abstract.h"
+#include "sparse_kernel.h"
+#include "sparse_vector.h"
+#include <map>
+
+namespace dlib
+{
+    /*
+        This file contains optimized overloads of the kcentroid object for the following
+        linear cases:
+            kcentroid<linear_kernel<T>>
+            kcentroid<sparse_linear_kernel<T>>
+            kcentroid<offset_kernel<linear_kernel<T>>>
+            kcentroid<offset_kernel<sparse_linear_kernel<T>>>
+    */
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+//                     Overloads for when kernel_type == linear_kernel
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    class kcentroid<linear_kernel<T> >
+    {
+        
+        
+        typedef linear_kernel<T> kernel_type;
+    public:
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+
+        explicit kcentroid (
+            const kernel_type& kernel_, 
+            scalar_type tolerance_ = 0.001,
+            unsigned long max_dictionary_size_ = 1000000,
+            bool remove_oldest_first_ = false 
+        ) : 
+            my_remove_oldest_first(remove_oldest_first_),
+            kernel(kernel_), 
+            my_tolerance(tolerance_),
+            my_max_dictionary_size(max_dictionary_size_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(tolerance_ >= 0 && max_dictionary_size_ > 0,
+                "\tkcentroid::kcentroid()"
+                << "\n\t You have to give a positive tolerance"
+                << "\n\t this:                 " << this
+                << "\n\t tolerance_:           " << tolerance_ 
+                << "\n\t max_dictionary_size_: " << max_dictionary_size_ 
+                );
+
+            clear_dictionary();
+        }
+
+        scalar_type     tolerance() const               { return my_tolerance; }
+        unsigned long   max_dictionary_size() const     { return my_max_dictionary_size; }
+        bool            remove_oldest_first () const    { return my_remove_oldest_first; }
+        const kernel_type& get_kernel () const          { return kernel; }
+        scalar_type     samples_trained () const        { return samples_seen; }
+
+        void clear_dictionary ()
+        {
+            samples_seen = 0;
+            set_all_elements(w, 0);
+            alpha = 0;
+        }
+
+        scalar_type operator() (
+            const kcentroid& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(x.get_kernel() == get_kernel(),
+                "\tscalar_type kcentroid::operator()(const kcentroid& x)"
+                << "\n\tYou can only compare two kcentroid objects if they use the same kernel"
+                << "\n\tthis: " << this
+                );
+
+            if (w.size() > 0)
+            {
+                if (x.w.size() > 0)
+                    return length(alpha*w - x.alpha*x.w);
+                else
+                    return alpha*length(w);
+            }
+            else
+            {
+                if (x.w.size() > 0)
+                    return x.alpha*length(x.w);
+                else
+                    return 0;
+            }
+        }
+
+        scalar_type inner_product (
+            const sample_type& x
+        ) const
+        {
+            if (w.size() > 0)
+                return alpha*trans(w)*x;
+            else 
+                return 0;
+        }
+
+        scalar_type inner_product (
+            const kcentroid& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(x.get_kernel() == get_kernel(),
+                "\tscalar_type kcentroid::inner_product(const kcentroid& x)"
+                << "\n\tYou can only compare two kcentroid objects if they use the same kernel"
+                << "\n\tthis: " << this
+                );
+
+            if (w.size() > 0 && x.w.size() > 0)
+                return alpha*x.alpha*trans(w)*x.w;
+            else
+                return 0;
+        }
+
+        scalar_type squared_norm (
+        ) const
+        {
+            if (w.size() > 0)
+                return alpha*alpha*trans(w)*w;
+            else
+                return 0;
+        }
+
+        scalar_type operator() (
+            const sample_type& x
+        ) const
+        {
+            if (w.size() > 0)
+                return length(x-alpha*w);
+            else
+                return length(x);
+        }
+
+        scalar_type test_and_train (
+            const sample_type& x
+        )
+        {
+            ++samples_seen;
+            const scalar_type xscale = 1/samples_seen;
+            const scalar_type cscale = 1-xscale;
+
+            do_train(x, cscale, xscale);
+
+            return (*this)(x);
+        }
+
+        void train (
+            const sample_type& x
+        )
+        {
+            ++samples_seen;
+            const scalar_type xscale = 1/samples_seen;
+            const scalar_type cscale = 1-xscale;
+
+            do_train(x, cscale, xscale);
+        }
+
+        scalar_type test_and_train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            ++samples_seen;
+
+            do_train(x, cscale, xscale);
+
+            return (*this)(x);
+        }
+
+        void scale_by (
+            scalar_type cscale
+        )
+        {
+            alpha *= cscale;
+        }
+
+        void train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            ++samples_seen;
+            do_train(x, cscale, xscale);
+        }
+
+        void swap (
+            kcentroid& item
+        )
+        {
+            exchange(my_remove_oldest_first, item.my_remove_oldest_first);
+            exchange(kernel, item.kernel);
+            exchange(w, item.w);
+            exchange(alpha, item.alpha);
+            exchange(my_tolerance, item.my_tolerance);
+            exchange(my_max_dictionary_size, item.my_max_dictionary_size);
+            exchange(samples_seen, item.samples_seen);
+        }
+
+        unsigned long dictionary_size (
+        ) const 
+        { 
+            if (samples_seen > 0)
+                return 1;
+            else
+                return 0;
+        }
+
+        friend void serialize(const kcentroid& item, std::ostream& out)
+        {
+            serialize(item.my_remove_oldest_first, out);
+            serialize(item.kernel, out);
+            serialize(item.w, out);
+            serialize(item.alpha, out);
+            serialize(item.my_tolerance, out);
+            serialize(item.my_max_dictionary_size, out);
+            serialize(item.samples_seen, out);
+        }
+
+        friend void deserialize(kcentroid& item, std::istream& in)
+        {
+            deserialize(item.my_remove_oldest_first, in);
+            deserialize(item.kernel, in);
+            deserialize(item.w, in);
+            deserialize(item.alpha, in);
+            deserialize(item.my_tolerance, in);
+            deserialize(item.my_max_dictionary_size, in);
+            deserialize(item.samples_seen, in);
+        }
+
+        distance_function<kernel_type> get_distance_function (
+        ) const
+        {
+            if (samples_seen > 0)
+            {
+                typename distance_function<kernel_type>::sample_vector_type temp_basis_vectors; 
+                typename distance_function<kernel_type>::scalar_vector_type temp_alpha; 
+
+                temp_basis_vectors.set_size(1);
+                temp_basis_vectors(0) = w;
+                temp_alpha.set_size(1);
+                temp_alpha(0) = alpha;
+
+                return distance_function<kernel_type>(temp_alpha, squared_norm(), kernel, temp_basis_vectors);
+            }
+            else
+            {
+                return distance_function<kernel_type>(kernel);
+            }
+        }
+
+    private:
+
+        void do_train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            set_size_of_w(x);
+
+            const scalar_type temp = cscale*alpha;
+
+            if (temp != 0)
+            {
+                w = w + xscale*x/temp;
+                alpha = temp;
+            }
+            else
+            {
+                w = cscale*alpha*w + xscale*x;
+                alpha = 1;
+            }
+        }
+
+        void set_size_of_w (
+            const sample_type& x
+        )
+        {
+            if (x.size() != w.size())
+            {
+                w.set_size(x.nr(), x.nc());
+                set_all_elements(w, 0);
+                alpha = 0;
+            }
+        }
+
+        bool my_remove_oldest_first;
+
+        kernel_type kernel;
+
+        sample_type w;
+        scalar_type alpha;
+
+
+        scalar_type my_tolerance;
+        unsigned long my_max_dictionary_size;
+        scalar_type samples_seen;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+//               Overloads for when kernel_type == offset_kernel<linear_kernel>
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    class kcentroid<offset_kernel<linear_kernel<T> > >
+    {
+        
+        /*!
+            INITIAL VALUE
+                - x_extra == sqrt(kernel.offset)
+
+            CONVENTION
+                - x_extra == sqrt(kernel.offset)
+                - w_extra == the value of the extra dimension tacked onto the
+                  end of the w vector
+        !*/
+        
+        typedef offset_kernel<linear_kernel<T> > kernel_type;
+    public:
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+
+        explicit kcentroid (
+            const kernel_type& kernel_, 
+            scalar_type tolerance_ = 0.001,
+            unsigned long max_dictionary_size_ = 1000000,
+            bool remove_oldest_first_ = false 
+        ) : 
+            my_remove_oldest_first(remove_oldest_first_),
+            kernel(kernel_), 
+            my_tolerance(tolerance_),
+            my_max_dictionary_size(max_dictionary_size_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(tolerance_ >= 0 && max_dictionary_size_ > 0,
+                "\tkcentroid::kcentroid()"
+                << "\n\t You have to give a positive tolerance"
+                << "\n\t this:                 " << this
+                << "\n\t tolerance_:           " << tolerance_ 
+                << "\n\t max_dictionary_size_: " << max_dictionary_size_ 
+                );
+
+            x_extra = std::sqrt(kernel.offset);
+
+            clear_dictionary();
+        }
+
+        scalar_type     tolerance() const               { return my_tolerance; }
+        unsigned long   max_dictionary_size() const     { return my_max_dictionary_size; }
+        bool            remove_oldest_first () const    { return my_remove_oldest_first; }
+        const kernel_type& get_kernel () const          { return kernel; }
+        scalar_type     samples_trained () const        { return samples_seen; }
+
+        void clear_dictionary ()
+        {
+            samples_seen = 0;
+            set_all_elements(w, 0);
+            alpha = 0;
+            w_extra = x_extra;
+        }
+
+        scalar_type operator() (
+            const kcentroid& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(x.get_kernel() == get_kernel(),
+                "\tscalar_type kcentroid::operator()(const kcentroid& x)"
+                << "\n\tYou can only compare two kcentroid objects if they use the same kernel"
+                << "\n\tthis: " << this
+                );
+
+            if (w.size() > 0)
+            {
+                if (x.w.size() > 0)
+                {
+                    scalar_type temp1 = length_squared(alpha*w - x.alpha*x.w);
+                    scalar_type temp2 = alpha*w_extra - x.alpha*x.w_extra;
+                    return std::sqrt(temp1 + temp2*temp2);
+                }
+                else
+                {
+                    return alpha*std::sqrt(length_squared(w) + w_extra*w_extra);
+                }
+            }
+            else
+            {
+                if (x.w.size() > 0)
+                    return x.alpha*std::sqrt(length_squared(x.w) + x.w_extra*x.w_extra);
+                else
+                    return 0;
+            }
+        }
+
+        scalar_type inner_product (
+            const sample_type& x
+        ) const
+        {
+            if (w.size() > 0)
+                return alpha*(trans(w)*x + w_extra*x_extra);
+            else 
+                return 0;
+        }
+
+        scalar_type inner_product (
+            const kcentroid& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(x.get_kernel() == get_kernel(),
+                "\tscalar_type kcentroid::inner_product(const kcentroid& x)"
+                << "\n\tYou can only compare two kcentroid objects if they use the same kernel"
+                << "\n\tthis: " << this
+                );
+
+            if (w.size() > 0 && x.w.size() > 0)
+                return alpha*x.alpha*(trans(w)*x.w + w_extra*x.w_extra);
+            else
+                return 0;
+        }
+
+        scalar_type squared_norm (
+        ) const
+        {
+            if (w.size() > 0)
+                return alpha*alpha*(trans(w)*w + w_extra*w_extra);
+            else
+                return 0;
+        }
+
+        scalar_type operator() (
+            const sample_type& x
+        ) const
+        {
+            if (w.size() > 0)
+            {
+                scalar_type temp1 = length_squared(x-alpha*w);
+                scalar_type temp2 = x_extra - alpha*w_extra;
+                return std::sqrt(temp1 + temp2*temp2);
+            }
+            else
+            {
+                return std::sqrt(length_squared(x) + x_extra*x_extra);
+            }
+        }
+
+        scalar_type test_and_train (
+            const sample_type& x
+        )
+        {
+            ++samples_seen;
+            const scalar_type xscale = 1/samples_seen;
+            const scalar_type cscale = 1-xscale;
+
+            do_train(x, cscale, xscale);
+
+            return (*this)(x);
+        }
+
+        void train (
+            const sample_type& x
+        )
+        {
+            ++samples_seen;
+            const scalar_type xscale = 1/samples_seen;
+            const scalar_type cscale = 1-xscale;
+
+            do_train(x, cscale, xscale);
+        }
+
+        scalar_type test_and_train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            ++samples_seen;
+
+            do_train(x, cscale, xscale);
+
+            return (*this)(x);
+        }
+
+        void scale_by (
+            scalar_type cscale
+        )
+        {
+            alpha *= cscale;
+            w_extra *= cscale;
+        }
+
+        void train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            ++samples_seen;
+            do_train(x, cscale, xscale);
+        }
+
+        void swap (
+            kcentroid& item
+        )
+        {
+            exchange(my_remove_oldest_first, item.my_remove_oldest_first);
+            exchange(kernel, item.kernel);
+            exchange(w, item.w);
+            exchange(alpha, item.alpha);
+            exchange(w_extra, item.w_extra);
+            exchange(x_extra, item.x_extra);
+            exchange(my_tolerance, item.my_tolerance);
+            exchange(my_max_dictionary_size, item.my_max_dictionary_size);
+            exchange(samples_seen, item.samples_seen);
+        }
+
+        unsigned long dictionary_size (
+        ) const 
+        { 
+            if (samples_seen > 0)
+            {
+                if (std::abs(w_extra) > std::numeric_limits<scalar_type>::epsilon())
+                    return 1;
+                else
+                    return 2;
+            }
+            else
+                return 0;
+        }
+
+        friend void serialize(const kcentroid& item, std::ostream& out)
+        {
+            serialize(item.my_remove_oldest_first, out);
+            serialize(item.kernel, out);
+            serialize(item.w, out);
+            serialize(item.alpha, out);
+            serialize(item.w_extra, out);
+            serialize(item.x_extra, out);
+            serialize(item.my_tolerance, out);
+            serialize(item.my_max_dictionary_size, out);
+            serialize(item.samples_seen, out);
+        }
+
+        friend void deserialize(kcentroid& item, std::istream& in)
+        {
+            deserialize(item.my_remove_oldest_first, in);
+            deserialize(item.kernel, in);
+            deserialize(item.w, in);
+            deserialize(item.alpha, in);
+            deserialize(item.w_extra, in);
+            deserialize(item.x_extra, in);
+            deserialize(item.my_tolerance, in);
+            deserialize(item.my_max_dictionary_size, in);
+            deserialize(item.samples_seen, in);
+        }
+
+        distance_function<kernel_type> get_distance_function (
+        ) const
+        {
+
+            if (samples_seen > 0)
+            {
+                typename distance_function<kernel_type>::sample_vector_type temp_basis_vectors; 
+                typename distance_function<kernel_type>::scalar_vector_type temp_alpha; 
+
+                // What we are doing here needs a bit of explanation.  The w vector
+                // has an implicit extra dimension tacked on to it with the value of w_extra.
+                // The kernel we are using takes normal vectors and implicitly tacks the value
+                // x_extra onto their end.  So what we are doing here is scaling w so that
+                // the value it should have tacked onto it is x_scale.  Note that we also
+                // adjust alpha so that the combination of alpha*w stays the same.
+                scalar_type scale;
+
+                // if w_extra is basically greater than 0
+                if (std::abs(w_extra) > std::numeric_limits<scalar_type>::epsilon())
+                {
+                    scale = (x_extra/w_extra);
+                    temp_basis_vectors.set_size(1);
+                    temp_alpha.set_size(1);
+                    temp_basis_vectors(0) = w*scale;
+                    temp_alpha(0) = alpha/scale;
+                }
+                else
+                {
+                    // In this case w_extra is zero. So the only way we can get the same
+                    // thing in the output basis vector set is by using two vectors
+                    temp_basis_vectors.set_size(2);
+                    temp_alpha.set_size(2);
+                    temp_basis_vectors(0) = 2*w;
+                    temp_alpha(0) = alpha;
+                    temp_basis_vectors(1) = w;
+                    temp_alpha(1) = -alpha;
+                }
+
+
+                return distance_function<kernel_type>(temp_alpha, squared_norm(), kernel, temp_basis_vectors);
+            }
+            else
+            {
+                return distance_function<kernel_type>(kernel);
+            }
+        }
+
+    private:
+
+        void do_train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            set_size_of_w(x);
+
+            const scalar_type temp = cscale*alpha;
+
+            if (temp != 0)
+            {
+                w = w + xscale*x/temp;
+                w_extra = w_extra + xscale*x_extra/temp;
+                alpha = temp;
+            }
+            else
+            {
+                w = cscale*alpha*w + xscale*x;
+                w_extra = cscale*alpha*w_extra + xscale*x_extra;
+                alpha = 1;
+            }
+        }
+
+        void set_size_of_w (
+            const sample_type& x
+        )
+        {
+            if (x.size() != w.size())
+            {
+                w.set_size(x.nr(), x.nc());
+                set_all_elements(w, 0);
+                alpha = 0;
+                w_extra = x_extra;
+            }
+        }
+
+        bool my_remove_oldest_first;
+
+        kernel_type kernel;
+
+        sample_type w;
+        scalar_type alpha;
+
+        scalar_type w_extra;
+        scalar_type x_extra;
+
+
+        scalar_type my_tolerance;
+        unsigned long my_max_dictionary_size;
+        scalar_type samples_seen;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+//                     Overloads for when kernel_type == sparse_linear_kernel
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    class kcentroid<sparse_linear_kernel<T> >
+    {
+        
+        
+        typedef sparse_linear_kernel<T> kernel_type;
+    public:
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+
+        explicit kcentroid (
+            const kernel_type& kernel_, 
+            scalar_type tolerance_ = 0.001,
+            unsigned long max_dictionary_size_ = 1000000,
+            bool remove_oldest_first_ = false 
+        ) : 
+            my_remove_oldest_first(remove_oldest_first_),
+            kernel(kernel_), 
+            my_tolerance(tolerance_),
+            my_max_dictionary_size(max_dictionary_size_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(tolerance_ >= 0 && max_dictionary_size_ > 0,
+                "\tkcentroid::kcentroid()"
+                << "\n\t You have to give a positive tolerance"
+                << "\n\t this:                 " << this
+                << "\n\t tolerance_:           " << tolerance_ 
+                << "\n\t max_dictionary_size_: " << max_dictionary_size_ 
+                );
+
+            clear_dictionary();
+        }
+
+        scalar_type     tolerance() const               { return my_tolerance; }
+        unsigned long   max_dictionary_size() const     { return my_max_dictionary_size; }
+        bool            remove_oldest_first () const    { return my_remove_oldest_first; }
+        const kernel_type& get_kernel () const          { return kernel; }
+        scalar_type     samples_trained () const        { return samples_seen; }
+
+        void clear_dictionary ()
+        {
+            samples_seen = 0;
+            w.clear();
+            alpha = 0;
+        }
+
+        scalar_type operator() (
+            const kcentroid& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(x.get_kernel() == get_kernel(),
+                "\tscalar_type kcentroid::operator()(const kcentroid& x)"
+                << "\n\tYou can only compare two kcentroid objects if they use the same kernel"
+                << "\n\tthis: " << this
+                );
+
+            return distance(alpha,w , x.alpha,x.w);
+        }
+
+        scalar_type inner_product (
+            const sample_type& x
+        ) const
+        {
+            return alpha*dot(w,x);
+        }
+
+        scalar_type inner_product (
+            const kcentroid& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(x.get_kernel() == get_kernel(),
+                "\tscalar_type kcentroid::inner_product(const kcentroid& x)"
+                << "\n\tYou can only compare two kcentroid objects if they use the same kernel"
+                << "\n\tthis: " << this
+                );
+
+            return alpha*x.alpha*dot(w,x.w);
+        }
+
+        scalar_type squared_norm (
+        ) const
+        {
+            return alpha*alpha*length_squared(w);
+        }
+
+        scalar_type operator() (
+            const sample_type& x
+        ) const
+        {
+            return distance(static_cast<scalar_type>(1), x, alpha, w);
+        }
+
+        scalar_type test_and_train (
+            const sample_type& x
+        )
+        {
+            ++samples_seen;
+            const scalar_type xscale = 1/samples_seen;
+            const scalar_type cscale = 1-xscale;
+
+            do_train(x, cscale, xscale);
+
+            return (*this)(x);
+        }
+
+        void train (
+            const sample_type& x
+        )
+        {
+            ++samples_seen;
+            const scalar_type xscale = 1/samples_seen;
+            const scalar_type cscale = 1-xscale;
+
+            do_train(x, cscale, xscale);
+        }
+
+        scalar_type test_and_train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            ++samples_seen;
+
+            do_train(x, cscale, xscale);
+
+            return (*this)(x);
+        }
+
+        void scale_by (
+            scalar_type cscale
+        )
+        {
+            alpha *= cscale;
+        }
+
+        void train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            ++samples_seen;
+            do_train(x, cscale, xscale);
+        }
+
+        void swap (
+            kcentroid& item
+        )
+        {
+            exchange(my_remove_oldest_first, item.my_remove_oldest_first);
+            exchange(kernel, item.kernel);
+            exchange(w, item.w);
+            exchange(alpha, item.alpha);
+            exchange(my_tolerance, item.my_tolerance);
+            exchange(my_max_dictionary_size, item.my_max_dictionary_size);
+            exchange(samples_seen, item.samples_seen);
+        }
+
+        unsigned long dictionary_size (
+        ) const 
+        { 
+            if (samples_seen > 0)
+                return 1;
+            else
+                return 0;
+        }
+
+        friend void serialize(const kcentroid& item, std::ostream& out)
+        {
+            serialize(item.my_remove_oldest_first, out);
+            serialize(item.kernel, out);
+            serialize(item.w, out);
+            serialize(item.alpha, out);
+            serialize(item.my_tolerance, out);
+            serialize(item.my_max_dictionary_size, out);
+            serialize(item.samples_seen, out);
+        }
+
+        friend void deserialize(kcentroid& item, std::istream& in)
+        {
+            deserialize(item.my_remove_oldest_first, in);
+            deserialize(item.kernel, in);
+            deserialize(item.w, in);
+            deserialize(item.alpha, in);
+            deserialize(item.my_tolerance, in);
+            deserialize(item.my_max_dictionary_size, in);
+            deserialize(item.samples_seen, in);
+        }
+
+        distance_function<kernel_type> get_distance_function (
+        ) const
+        {
+            if (samples_seen > 0)
+            {
+                typename distance_function<kernel_type>::sample_vector_type temp_basis_vectors; 
+                typename distance_function<kernel_type>::scalar_vector_type temp_alpha; 
+
+                temp_basis_vectors.set_size(1);
+                temp_basis_vectors(0) = sample_type(w.begin(), w.end());
+                temp_alpha.set_size(1);
+                temp_alpha(0) = alpha;
+
+                return distance_function<kernel_type>(temp_alpha, squared_norm(), kernel, temp_basis_vectors);
+            }
+            else
+            {
+                return distance_function<kernel_type>(kernel);
+            }
+        }
+
+    private:
+
+        void do_train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            const scalar_type temp = cscale*alpha;
+
+            if (temp != 0)
+            {
+                // compute w += xscale*x/temp
+                typename sample_type::const_iterator i;
+                for (i = x.begin(); i != x.end(); ++i)
+                {
+                    w[i->first] += xscale*(i->second)/temp;
+                }
+
+                alpha = temp;
+            }
+            else
+            {
+                // first compute w = cscale*alpha*w
+                for (typename std::map<unsigned long,scalar_type>::iterator i = w.begin(); i != w.end(); ++i)
+                {
+                    i->second *= cscale*alpha;
+                }
+
+                // now compute w += xscale*x
+                for (typename sample_type::const_iterator i = x.begin(); i != x.end(); ++i)
+                {
+                    w[i->first] += xscale*(i->second);
+                }
+
+                alpha = 1;
+            }
+        }
+
+        bool my_remove_oldest_first;
+
+        kernel_type kernel;
+
+        std::map<unsigned long,scalar_type> w;
+        scalar_type alpha;
+
+
+        scalar_type my_tolerance;
+        unsigned long my_max_dictionary_size;
+        scalar_type samples_seen;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+//               Overloads for when kernel_type == offset_kernel<sparse_linear_kernel>
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    class kcentroid<offset_kernel<sparse_linear_kernel<T> > >
+    {
+        
+        /*!
+            INITIAL VALUE
+                - x_extra == sqrt(kernel.offset)
+
+            CONVENTION
+                - x_extra == sqrt(kernel.offset)
+                - w_extra == the value of the extra dimension tacked onto the
+                  end of the w vector
+        !*/
+        
+        typedef offset_kernel<sparse_linear_kernel<T> > kernel_type;
+    public:
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+
+        explicit kcentroid (
+            const kernel_type& kernel_, 
+            scalar_type tolerance_ = 0.001,
+            unsigned long max_dictionary_size_ = 1000000,
+            bool remove_oldest_first_ = false 
+        ) : 
+            my_remove_oldest_first(remove_oldest_first_),
+            kernel(kernel_), 
+            my_tolerance(tolerance_),
+            my_max_dictionary_size(max_dictionary_size_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(tolerance_ >= 0 && max_dictionary_size_ > 0,
+                "\tkcentroid::kcentroid()"
+                << "\n\t You have to give a positive tolerance"
+                << "\n\t this:                 " << this
+                << "\n\t tolerance_:           " << tolerance_ 
+                << "\n\t max_dictionary_size_: " << max_dictionary_size_ 
+                );
+
+            x_extra = std::sqrt(kernel.offset);
+
+            clear_dictionary();
+        }
+
+        scalar_type     tolerance() const               { return my_tolerance; }
+        unsigned long   max_dictionary_size() const     { return my_max_dictionary_size; }
+        bool            remove_oldest_first () const    { return my_remove_oldest_first; }
+        const kernel_type& get_kernel () const          { return kernel; }
+        scalar_type     samples_trained () const        { return samples_seen; }
+
+        void clear_dictionary ()
+        {
+            samples_seen = 0;
+            w.clear();
+            alpha = 0;
+            w_extra = x_extra;
+        }
+
+        scalar_type operator() (
+            const kcentroid& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(x.get_kernel() == get_kernel(),
+                "\tscalar_type kcentroid::operator()(const kcentroid& x)"
+                << "\n\tYou can only compare two kcentroid objects if they use the same kernel"
+                << "\n\tthis: " << this
+                );
+
+            if (samples_seen > 0)
+            {
+                scalar_type temp1 = distance_squared(alpha,w , x.alpha,x.w);
+                scalar_type temp2 = alpha*w_extra - x.alpha*x.w_extra;
+                return std::sqrt(temp1 + temp2*temp2);
+            }
+            else
+            {
+                return 0;
+            }
+        }
+
+        scalar_type inner_product (
+            const sample_type& x
+        ) const
+        {
+            if (samples_seen > 0)
+                return alpha*(dot(w,x) + w_extra*x_extra);
+            else 
+                return 0;
+        }
+
+        scalar_type inner_product (
+            const kcentroid& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(x.get_kernel() == get_kernel(),
+                "\tscalar_type kcentroid::inner_product(const kcentroid& x)"
+                << "\n\tYou can only compare two kcentroid objects if they use the same kernel"
+                << "\n\tthis: " << this
+                );
+
+            if (samples_seen > 0 && x.samples_seen > 0)
+                return alpha*x.alpha*(dot(w,x.w) + w_extra*x.w_extra);
+            else
+                return 0;
+        }
+
+        scalar_type squared_norm (
+        ) const
+        {
+            if (samples_seen > 0)
+                return alpha*alpha*(length_squared(w) + w_extra*w_extra);
+            else
+                return 0;
+        }
+
+        scalar_type operator() (
+            const sample_type& x
+        ) const
+        {
+            if (samples_seen > 0)
+            {
+                scalar_type temp1 = distance_squared(1,x,alpha,w);
+                scalar_type temp2 = x_extra - alpha*w_extra;
+                return std::sqrt(temp1 + temp2*temp2);
+            }
+            else
+            {
+                return std::sqrt(length_squared(x) + x_extra*x_extra);
+            }
+        }
+
+        scalar_type test_and_train (
+            const sample_type& x
+        )
+        {
+            ++samples_seen;
+            const scalar_type xscale = 1/samples_seen;
+            const scalar_type cscale = 1-xscale;
+
+            do_train(x, cscale, xscale);
+
+            return (*this)(x);
+        }
+
+        void train (
+            const sample_type& x
+        )
+        {
+            ++samples_seen;
+            const scalar_type xscale = 1/samples_seen;
+            const scalar_type cscale = 1-xscale;
+
+            do_train(x, cscale, xscale);
+        }
+
+        scalar_type test_and_train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            ++samples_seen;
+
+            do_train(x, cscale, xscale);
+
+            return (*this)(x);
+        }
+
+        void scale_by (
+            scalar_type cscale
+        )
+        {
+            alpha *= cscale;
+            w_extra *= cscale;
+        }
+
+        void train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+            ++samples_seen;
+            do_train(x, cscale, xscale);
+        }
+
+        void swap (
+            kcentroid& item
+        )
+        {
+            exchange(my_remove_oldest_first, item.my_remove_oldest_first);
+            exchange(kernel, item.kernel);
+            exchange(w, item.w);
+            exchange(alpha, item.alpha);
+            exchange(w_extra, item.w_extra);
+            exchange(x_extra, item.x_extra);
+            exchange(my_tolerance, item.my_tolerance);
+            exchange(my_max_dictionary_size, item.my_max_dictionary_size);
+            exchange(samples_seen, item.samples_seen);
+        }
+
+        unsigned long dictionary_size (
+        ) const 
+        { 
+            if (samples_seen > 0)
+            {
+                if (std::abs(w_extra) > std::numeric_limits<scalar_type>::epsilon())
+                    return 1;
+                else
+                    return 2;
+            }
+            else
+            {
+                return 0;
+            }
+        }
+
+        friend void serialize(const kcentroid& item, std::ostream& out)
+        {
+            serialize(item.my_remove_oldest_first, out);
+            serialize(item.kernel, out);
+            serialize(item.w, out);
+            serialize(item.alpha, out);
+            serialize(item.w_extra, out);
+            serialize(item.x_extra, out);
+            serialize(item.my_tolerance, out);
+            serialize(item.my_max_dictionary_size, out);
+            serialize(item.samples_seen, out);
+        }
+
+        friend void deserialize(kcentroid& item, std::istream& in)
+        {
+            deserialize(item.my_remove_oldest_first, in);
+            deserialize(item.kernel, in);
+            deserialize(item.w, in);
+            deserialize(item.alpha, in);
+            deserialize(item.w_extra, in);
+            deserialize(item.x_extra, in);
+            deserialize(item.my_tolerance, in);
+            deserialize(item.my_max_dictionary_size, in);
+            deserialize(item.samples_seen, in);
+        }
+
+        distance_function<kernel_type> get_distance_function (
+        ) const
+        {
+            if (samples_seen > 0)
+            {
+                typename distance_function<kernel_type>::sample_vector_type temp_basis_vectors; 
+                typename distance_function<kernel_type>::scalar_vector_type temp_alpha; 
+
+                // What we are doing here needs a bit of explanation.  The w vector
+                // has an implicit extra dimension tacked on to it with the value of w_extra.
+                // The kernel we are using takes normal vectors and implicitly tacks the value
+                // x_extra onto their end.  So what we are doing here is scaling w so that
+                // the value it should have tacked onto it is x_scale.  Note that we also
+                // adjust alpha so that the combination of alpha*w stays the same.
+                scalar_type scale;
+
+                // if w_extra is basically greater than 0
+                if (std::abs(w_extra) > std::numeric_limits<scalar_type>::epsilon())
+                {
+                    scale = (x_extra/w_extra);
+                    temp_basis_vectors.set_size(1);
+                    temp_alpha.set_size(1);
+                    temp_basis_vectors(0) = sample_type(w.begin(), w.end());
+                    dlib::scale_by(temp_basis_vectors(0), scale);
+                    temp_alpha(0) = alpha/scale;
+                }
+                else
+                {
+                    // In this case w_extra is zero. So the only way we can get the same
+                    // thing in the output basis vector set is by using two vectors
+                    temp_basis_vectors.set_size(2);
+                    temp_alpha.set_size(2);
+                    temp_basis_vectors(0) = sample_type(w.begin(), w.end());
+                    dlib::scale_by(temp_basis_vectors(0), 2);
+                    temp_alpha(0) = alpha;
+                    temp_basis_vectors(1) = sample_type(w.begin(), w.end());
+                    temp_alpha(1) = -alpha;
+                }
+
+                return distance_function<kernel_type>(temp_alpha, squared_norm(), kernel, temp_basis_vectors);
+
+            }
+            else
+            {
+                return distance_function<kernel_type>(kernel);
+            }
+
+        }
+
+    private:
+
+        void do_train (
+            const sample_type& x,
+            scalar_type cscale,
+            scalar_type xscale
+        )
+        {
+
+            const scalar_type temp = cscale*alpha;
+
+            if (temp != 0)
+            {
+                // compute w += xscale*x/temp
+                typename sample_type::const_iterator i;
+                for (i = x.begin(); i != x.end(); ++i)
+                {
+                    w[i->first] += xscale*(i->second)/temp;
+                }
+
+                w_extra = w_extra + xscale*x_extra/temp;
+                alpha = temp;
+            }
+            else
+            {
+                // first compute w = cscale*alpha*w
+                for (typename std::map<unsigned long,scalar_type>::iterator i = w.begin(); i != w.end(); ++i)
+                {
+                    i->second *= cscale*alpha;
+                }
+
+                // now compute w += xscale*x
+                for (typename sample_type::const_iterator i = x.begin(); i != x.end(); ++i)
+                {
+                    w[i->first] += xscale*(i->second);
+                }
+
+
+                w_extra = cscale*alpha*w_extra + xscale*x_extra;
+                alpha = 1;
+            }
+        }
+
+        bool my_remove_oldest_first;
+
+        kernel_type kernel;
+
+        std::map<unsigned long,scalar_type> w;
+        scalar_type alpha;
+
+        scalar_type w_extra;
+        scalar_type x_extra;
+
+
+        scalar_type my_tolerance;
+        unsigned long my_max_dictionary_size;
+        scalar_type samples_seen;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_KCENTROId_OVERLOADS_
+
+
diff --git a/ml/dlib/dlib/svm/kernel.h b/ml/dlib/dlib/svm/kernel.h
new file mode 100644
index 000000000..907420986
--- /dev/null
+++ b/ml/dlib/dlib/svm/kernel.h
@@ -0,0 +1,569 @@
+// Copyright (C) 2007  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_KERNEL
+#define DLIB_SVm_KERNEL
+
+#include "kernel_abstract.h"
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix.h"
+#include "../algs.h"
+#include "../serialize.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template < typename kernel_type > struct kernel_derivative;
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct radial_basis_kernel
+    {
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        // T must be capable of representing a column vector.
+        COMPILE_TIME_ASSERT(T::NC == 1 || T::NC == 0);
+
+        radial_basis_kernel(const scalar_type g) : gamma(g) {}
+        radial_basis_kernel() : gamma(0.1) {}
+        radial_basis_kernel(
+            const radial_basis_kernel& k
+        ) : gamma(k.gamma) {}
+
+
+        const scalar_type gamma;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const
+        { 
+            const scalar_type d = trans(a-b)*(a-b);
+            return std::exp(-gamma*d);
+        }
+
+        radial_basis_kernel& operator= (
+            const radial_basis_kernel& k
+        )
+        {
+            const_cast<scalar_type&>(gamma) = k.gamma;
+            return *this;
+        }
+
+        bool operator== (
+            const radial_basis_kernel& k
+        ) const
+        {
+            return gamma == k.gamma;
+        }
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const radial_basis_kernel<T>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.gamma, out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type radial_basis_kernel"); 
+        }
+    }
+
+    template <
+        typename T
+        >
+    void deserialize (
+        radial_basis_kernel<T>& item,
+        std::istream& in 
+    )
+    {
+        typedef typename T::type scalar_type;
+        try
+        {
+            deserialize(const_cast<scalar_type&>(item.gamma), in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type radial_basis_kernel"); 
+        }
+    }
+
+    template <
+        typename T 
+        >
+    struct kernel_derivative<radial_basis_kernel<T> >
+    {
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        kernel_derivative(const radial_basis_kernel<T>& k_) : k(k_){}
+
+        const sample_type& operator() (const sample_type& x, const sample_type& y) const
+        {
+            // return the derivative of the rbf kernel
+            temp = 2*k.gamma*(x-y)*k(x,y);
+            return temp;
+        }
+
+        const radial_basis_kernel<T>& k;
+        mutable sample_type temp;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct polynomial_kernel
+    {
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        // T must be capable of representing a column vector.
+        COMPILE_TIME_ASSERT(T::NC == 1 || T::NC == 0);
+
+        polynomial_kernel(const scalar_type g, const scalar_type c, const scalar_type d) : gamma(g), coef(c), degree(d) {}
+        polynomial_kernel() : gamma(1), coef(0), degree(1) {}
+        polynomial_kernel(
+            const polynomial_kernel& k
+        ) : gamma(k.gamma), coef(k.coef), degree(k.degree) {}
+
+        typedef T type;
+        const scalar_type gamma;
+        const scalar_type coef;
+        const scalar_type degree;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const
+        { 
+            return std::pow(gamma*(trans(a)*b) + coef, degree);
+        }
+
+        polynomial_kernel& operator= (
+            const polynomial_kernel& k
+        )
+        {
+            const_cast<scalar_type&>(gamma) = k.gamma;
+            const_cast<scalar_type&>(coef) = k.coef;
+            const_cast<scalar_type&>(degree) = k.degree;
+            return *this;
+        }
+
+        bool operator== (
+            const polynomial_kernel& k
+        ) const
+        {
+            return (gamma == k.gamma) && (coef == k.coef) && (degree == k.degree);
+        }
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const polynomial_kernel<T>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.gamma, out);
+            serialize(item.coef, out);
+            serialize(item.degree, out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type polynomial_kernel"); 
+        }
+    }
+
+    template <
+        typename T
+        >
+    void deserialize (
+        polynomial_kernel<T>& item,
+        std::istream& in 
+    )
+    {
+        typedef typename T::type scalar_type;
+        try
+        {
+            deserialize(const_cast<scalar_type&>(item.gamma), in);
+            deserialize(const_cast<scalar_type&>(item.coef), in);
+            deserialize(const_cast<scalar_type&>(item.degree), in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type polynomial_kernel"); 
+        }
+    }
+
+    template <
+        typename T 
+        >
+    struct kernel_derivative<polynomial_kernel<T> >
+    {
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        kernel_derivative(const polynomial_kernel<T>& k_) : k(k_){}
+
+        const sample_type& operator() (const sample_type& x, const sample_type& y) const
+        {
+            // return the derivative of the rbf kernel
+            temp = k.degree*k.gamma*x*std::pow(k.gamma*(trans(x)*y) + k.coef, k.degree-1);
+            return temp;
+        }
+
+        const polynomial_kernel<T>& k;
+        mutable sample_type temp;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct sigmoid_kernel
+    {
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        // T must be capable of representing a column vector.
+        COMPILE_TIME_ASSERT(T::NC == 1 || T::NC == 0);
+
+        sigmoid_kernel(const scalar_type g, const scalar_type c) : gamma(g), coef(c) {}
+        sigmoid_kernel() : gamma(0.1), coef(-1.0) {}
+        sigmoid_kernel(
+            const sigmoid_kernel& k
+        ) : gamma(k.gamma), coef(k.coef) {}
+
+        typedef T type;
+        const scalar_type gamma;
+        const scalar_type coef;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const
+        { 
+            return std::tanh(gamma*(trans(a)*b) + coef);
+        }
+
+        sigmoid_kernel& operator= (
+            const sigmoid_kernel& k
+        )
+        {
+            const_cast<scalar_type&>(gamma) = k.gamma;
+            const_cast<scalar_type&>(coef) = k.coef;
+            return *this;
+        }
+
+        bool operator== (
+            const sigmoid_kernel& k
+        ) const
+        {
+            return (gamma == k.gamma) && (coef == k.coef);
+        }
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const sigmoid_kernel<T>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.gamma, out);
+            serialize(item.coef, out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type sigmoid_kernel"); 
+        }
+    }
+
+    template <
+        typename T
+        >
+    void deserialize (
+        sigmoid_kernel<T>& item,
+        std::istream& in 
+    )
+    {
+        typedef typename T::type scalar_type;
+        try
+        {
+            deserialize(const_cast<scalar_type&>(item.gamma), in);
+            deserialize(const_cast<scalar_type&>(item.coef), in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type sigmoid_kernel"); 
+        }
+    }
+
+    template <
+        typename T 
+        >
+    struct kernel_derivative<sigmoid_kernel<T> >
+    {
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        kernel_derivative(const sigmoid_kernel<T>& k_) : k(k_){}
+
+        const sample_type& operator() (const sample_type& x, const sample_type& y) const
+        {
+            // return the derivative of the rbf kernel
+            temp = k.gamma*x*(1-std::pow(k(x,y),2));
+            return temp;
+        }
+
+        const sigmoid_kernel<T>& k;
+        mutable sample_type temp;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    struct linear_kernel
+    {
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        // T must be capable of representing a column vector.
+        COMPILE_TIME_ASSERT(T::NC == 1 || T::NC == 0);
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const
+        { 
+            return trans(a)*b;
+        }
+
+        bool operator== (
+            const linear_kernel& 
+        ) const
+        {
+            return true;
+        }
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const linear_kernel<T>& ,
+        std::ostream& 
+    ){}
+
+    template <
+        typename T
+        >
+    void deserialize (
+        linear_kernel<T>& ,
+        std::istream&  
+    ){}
+
+    template <
+        typename T 
+        >
+    struct kernel_derivative<linear_kernel<T> >
+    {
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        kernel_derivative(const linear_kernel<T>& k_) : k(k_){}
+
+        const sample_type& operator() (const sample_type& x, const sample_type& ) const
+        {
+            return x;
+        }
+
+        const linear_kernel<T>& k;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    struct histogram_intersection_kernel
+    {
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const
+        { 
+            scalar_type temp = 0;
+            for (long i = 0; i < a.size(); ++i)
+            {
+                temp += std::min(a(i), b(i));
+            }
+            return temp;
+        }
+
+        bool operator== (
+            const histogram_intersection_kernel& 
+        ) const
+        {
+            return true;
+        }
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const histogram_intersection_kernel<T>& ,
+        std::ostream& 
+    ){}
+
+    template <
+        typename T
+        >
+    void deserialize (
+        histogram_intersection_kernel<T>& ,
+        std::istream&  
+    ){}
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    struct offset_kernel
+    {
+        typedef typename T::scalar_type scalar_type;
+        typedef typename T::sample_type sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        offset_kernel(const T& k, const scalar_type& offset_
+        ) : kernel(k), offset(offset_) {}
+        offset_kernel() : kernel(T()), offset(0.01) {}
+        offset_kernel(
+            const offset_kernel& k
+        ) : kernel(k.kernel), offset(k.offset) {}
+
+        const T kernel;
+        const scalar_type offset;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const
+        { 
+            return kernel(a,b) + offset;
+        }
+
+        offset_kernel& operator= (
+            const offset_kernel& k
+        )
+        {
+            const_cast<T&>(kernel) = k.kernel;
+            const_cast<scalar_type&>(offset) = k.offset;
+            return *this;
+        }
+
+        bool operator== (
+            const offset_kernel& k
+        ) const
+        {
+            return k.kernel == kernel && offset == k.offset;
+        }
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const offset_kernel<T>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.offset, out);
+            serialize(item.kernel, out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type offset_kernel"); 
+        }
+    }
+
+    template <
+        typename T
+        >
+    void deserialize (
+        offset_kernel<T>& item,
+        std::istream& in 
+    )
+    {
+        typedef typename offset_kernel<T>::scalar_type scalar_type;
+        try
+        {
+            deserialize(const_cast<scalar_type&>(item.offset), in);
+            deserialize(const_cast<T&>(item.kernel), in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type offset_kernel"); 
+        }
+    }
+
+    template <
+        typename T 
+        >
+    struct kernel_derivative<offset_kernel<T> >
+    {
+        typedef typename T::scalar_type scalar_type;
+        typedef typename T::sample_type sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        kernel_derivative(const offset_kernel<T>& k) : der(k.kernel){}
+
+        const sample_type operator() (const sample_type& x, const sample_type& y) const
+        {
+            return der(x,y);
+        }
+
+        kernel_derivative<T> der;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_KERNEL
+
+
diff --git a/ml/dlib/dlib/svm/kernel_abstract.h b/ml/dlib/dlib/svm/kernel_abstract.h
new file mode 100644
index 000000000..f72430eb8
--- /dev/null
+++ b/ml/dlib/dlib/svm/kernel_abstract.h
@@ -0,0 +1,681 @@
+// Copyright (C) 2007  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_KERNEL_ABSTRACT_
+#ifdef DLIB_SVm_KERNEL_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "../serialize.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+/*!A                               Kernel_Function_Objects                               */
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    /*! 
+        WHAT IS A KERNEL FUNCTION OBJECT?
+            In the context of the dlib library documentation a kernel function object
+            is an object with an interface with the following properties:
+                - a public typedef named sample_type
+                - a public typedef named scalar_type which should be a float, double, or 
+                  long double type.
+                - an overloaded operator() that operates on two items of sample_type 
+                  and returns a scalar_type.  
+                  (e.g. scalar_type val = kernel_function(sample1,sample2); 
+                   would be a valid expression)
+                - a public typedef named mem_manager_type that is an implementation of 
+                  dlib/memory_manager/memory_manager_kernel_abstract.h or
+                  dlib/memory_manager_global/memory_manager_global_kernel_abstract.h or
+                  dlib/memory_manager_stateless/memory_manager_stateless_kernel_abstract.h 
+                - an overloaded == operator that tells you if two kernels are
+                  identical or not.
+
+        THREAD SAFETY
+            For a kernel function to be threadsafe it means that it must be safe to
+            evaluate an expression like val = kernel_function(sample1,sample2)
+            simultaneously from multiple threads, even when the threads operate on the same
+            object instances (i.e. kernel_function, sample1, and sample2).  The most common
+            way to make this safe is to ensure that the kernel function does not mutate any
+            data, either in itself or in its arguments.  
+
+        For examples of kernel functions see the following objects
+        (e.g. the radial_basis_kernel).
+    !*/
+
+    template <
+        typename T
+        >
+    struct radial_basis_kernel
+    {
+        /*!
+            REQUIREMENTS ON T
+                T must be a dlib::matrix object 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a radial basis function kernel
+
+            THREAD SAFETY
+                This kernel is threadsafe.  
+        !*/
+
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        const scalar_type gamma;
+
+        radial_basis_kernel(
+        );
+        /*!
+            ensures
+                - #gamma == 0.1 
+        !*/
+
+        radial_basis_kernel(
+            const radial_basis_kernel& k
+        );
+        /*!
+            ensures
+                - #gamma == k.gamma
+        !*/
+
+        radial_basis_kernel(
+            const scalar_type g
+        );
+        /*!
+            ensures
+                - #gamma == g
+        !*/
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const;
+        /*!
+            requires
+                - a.nc() == 1
+                - b.nc() == 1
+                - a.nr() == b.nr()
+            ensures
+                - returns exp(-gamma * ||a-b||^2)
+        !*/
+
+        radial_basis_kernel& operator= (
+            const radial_basis_kernel& k
+        );
+        /*!
+            ensures
+                - #gamma = k.gamma
+                - returns *this
+        !*/
+
+        bool operator== (
+            const radial_basis_kernel& k
+        ) const;
+        /*!
+            ensures
+                - if (k and *this are identical) then
+                    - returns true
+                - else
+                    - returns false
+        !*/
+
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const radial_basis_kernel<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for radial_basis_kernel
+    !*/
+
+    template <
+        typename T
+        >
+    void deserialize (
+        radial_basis_kernel<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support for radial_basis_kernel
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct sigmoid_kernel
+    {
+        /*!
+            REQUIREMENTS ON T
+                T must be a dlib::matrix object 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a sigmoid kernel
+
+            THREAD SAFETY
+                This kernel is threadsafe.  
+        !*/
+
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        const scalar_type gamma;
+        const scalar_type coef;
+
+        sigmoid_kernel(
+        );
+        /*!
+            ensures
+                - #gamma == 0.1 
+                - #coef == -1.0 
+        !*/
+
+        sigmoid_kernel(
+            const sigmoid_kernel& k
+        );
+        /*!
+            ensures
+                - #gamma == k.gamma
+                - #coef == k.coef
+        !*/
+
+        sigmoid_kernel(
+            const scalar_type g,
+            const scalar_type c
+        );
+        /*!
+            ensures
+                - #gamma == g
+                - #coef == c
+        !*/
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const;
+        /*!
+            requires
+                - a.nc() == 1
+                - b.nc() == 1
+                - a.nr() == b.nr()
+            ensures
+                - returns tanh(gamma*trans(a)*b + coef)
+        !*/
+
+        sigmoid_kernel& operator= (
+            const sigmoid_kernel& k
+        );
+        /*!
+            ensures
+                - #gamma = k.gamma
+                - #coef = k.coef
+                - returns *this
+        !*/
+
+        bool operator== (
+            const sigmoid_kernel& k
+        ) const;
+        /*!
+            ensures
+                - if (k and *this are identical) then
+                    - returns true
+                - else
+                    - returns false
+        !*/
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const sigmoid_kernel<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for sigmoid_kernel
+    !*/
+
+    template <
+        typename T
+        >
+    void deserialize (
+        sigmoid_kernel<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support for sigmoid_kernel
+    !*/
+
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct polynomial_kernel
+    {
+        /*!
+            REQUIREMENTS ON T
+                T must be a dlib::matrix object 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a polynomial kernel
+
+            THREAD SAFETY
+                This kernel is threadsafe.  
+        !*/
+
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        const scalar_type gamma;
+        const scalar_type coef;
+        const scalar_type degree;
+
+        polynomial_kernel(
+        );
+        /*!
+            ensures
+                - #gamma == 1 
+                - #coef == 0 
+                - #degree == 1 
+        !*/
+
+        polynomial_kernel(
+            const polynomial_kernel& k
+        );
+        /*!
+            ensures
+                - #gamma == k.gamma
+                - #coef == k.coef
+                - #degree == k.degree
+        !*/
+
+        polynomial_kernel(
+            const scalar_type g,
+            const scalar_type c,
+            const scalar_type d
+        );
+        /*!
+            ensures
+                - #gamma == g
+                - #coef == c
+                - #degree == d
+        !*/
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const;
+        /*!
+            requires
+                - a.nc() == 1
+                - b.nc() == 1
+                - a.nr() == b.nr()
+            ensures
+                - returns pow(gamma*trans(a)*b + coef, degree)
+        !*/
+
+        polynomial_kernel& operator= (
+            const polynomial_kernel& k
+        );
+        /*!
+            ensures
+                - #gamma = k.gamma
+                - #coef = k.coef
+                - #degree = k.degree
+                - returns *this
+        !*/
+
+        bool operator== (
+            const polynomial_kernel& k
+        ) const;
+        /*!
+            ensures
+                - if (k and *this are identical) then
+                    - returns true
+                - else
+                    - returns false
+        !*/
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const polynomial_kernel<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for polynomial_kernel
+    !*/
+
+    template <
+        typename T
+        >
+    void deserialize (
+        polynomial_kernel<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support for polynomial_kernel
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct linear_kernel
+    {
+        /*!
+            REQUIREMENTS ON T
+                T must be a dlib::matrix object 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a linear function kernel
+
+            THREAD SAFETY
+                This kernel is threadsafe.  
+        !*/
+
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const;
+        /*!
+            requires
+                - a.nc() == 1
+                - b.nc() == 1
+                - a.nr() == b.nr()
+            ensures
+                - returns trans(a)*b
+        !*/
+
+        bool operator== (
+            const linear_kernel& k
+        ) const;
+        /*!
+            ensures
+                - returns true
+        !*/
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const linear_kernel<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for linear_kernel
+    !*/
+
+    template <
+        typename T
+        >
+    void deserialize (
+        linear_kernel<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support for linear_kernel 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct histogram_intersection_kernel
+    {
+        /*!
+            REQUIREMENTS ON T
+                T must be a dlib::matrix object 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a histogram intersection kernel kernel
+
+            THREAD SAFETY
+                This kernel is threadsafe.  
+        !*/
+
+        typedef typename T::type scalar_type;
+        typedef T sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const;
+        /*!
+            requires
+                - is_vector(a) 
+                - is_vector(b) 
+                - a.size() == b.size()
+                - min(a) >= 0
+                - min(b) >= 0
+            ensures
+                - returns sum over all i: std::min(a(i), b(i)) 
+        !*/
+
+        bool operator== (
+            const histogram_intersection_kernel& k
+        ) const;
+        /*!
+            ensures
+                - returns true
+        !*/
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const histogram_intersection_kernel<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for histogram_intersection_kernel
+    !*/
+
+    template <
+        typename T
+        >
+    void deserialize (
+        histogram_intersection_kernel<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support for histogram_intersection_kernel 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct offset_kernel
+    {
+        /*!
+            REQUIREMENTS ON T
+                T must be a kernel object (e.g. radial_basis_kernel, polynomial_kernel, etc.) 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a kernel with a fixed value offset
+                added to it.
+
+            THREAD SAFETY
+                This kernel is threadsafe.  
+        !*/
+
+        typedef typename T::scalar_type scalar_type;
+        typedef typename T::sample_type sample_type;
+        typedef typename T::mem_manager_type mem_manager_type;
+
+        const T kernel;
+        const scalar_type offset;
+
+        offset_kernel(
+        );
+        /*!
+            ensures
+                - #offset == 0.01 
+        !*/
+
+        offset_kernel(
+            const offset_kernel& k
+        );
+        /*!
+            ensures
+                - #offset == k.offset
+                - #kernel == k.kernel
+        !*/
+
+        offset_kernel(
+            const T& k,
+            const scalar_type& off
+        );
+        /*!
+            ensures
+                - #kernel == k 
+                - #offset == off 
+        !*/
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const;
+        /*!
+            ensures
+                - returns kernel(a,b) + offset
+        !*/
+
+        offset_kernel& operator= (
+            const offset_kernel& k
+        );
+        /*!
+            ensures
+                - #offset == k.offset
+                - #kernel == k.kernel
+        !*/
+
+        bool operator== (
+            const offset_kernel& k
+        ) const;
+        /*!
+            ensures
+                - if (k and *this are identical) then
+                    - returns true
+                - else
+                    - returns false
+        !*/
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const offset_kernel<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for offset_kernel
+    !*/
+
+    template <
+        typename T
+        >
+    void deserialize (
+        offset_kernel<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support for offset_kernel
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type
+        >
+    struct kernel_derivative
+    {
+        /*!
+            REQUIREMENTS ON kernel_type
+                kernel_type must be one of the following kernel types:
+                    - radial_basis_kernel
+                    - polynomial_kernel 
+                    - sigmoid_kernel
+                    - linear_kernel
+                    - offset_kernel
+
+            WHAT THIS OBJECT REPRESENTS
+                This is a function object that computes the derivative of a kernel 
+                function object.
+
+            THREAD SAFETY
+                It is always safe to use distinct instances of this object in different
+                threads.  However, when a single instance is shared between threads then
+                the following rules apply:
+                    Instances of this object are allowed to have a mutable cache which is
+                    used by const member functions.  Therefore, it is not safe to use one
+                    instance of this object from multiple threads (unless protected by a
+                    mutex).
+        !*/
+
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+        kernel_derivative(
+            const kernel_type& k_
+        ); 
+        /*!
+            ensures
+                - this object will return derivatives of the kernel object k_
+                - #k == k_
+        !*/
+
+        const sample_type operator() (
+            const sample_type& x, 
+            const sample_type& y
+        ) const;
+        /*!
+            ensures
+                - returns the derivative of k with respect to y.  
+        !*/
+
+        const kernel_type& k;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_KERNEL_ABSTRACT_
+
+
+
diff --git a/ml/dlib/dlib/svm/kernel_matrix.h b/ml/dlib/dlib/svm/kernel_matrix.h
new file mode 100644
index 000000000..f6e1e0b90
--- /dev/null
+++ b/ml/dlib/dlib/svm/kernel_matrix.h
@@ -0,0 +1,268 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_KERNEL_MATRIX_
+#define DLIB_SVm_KERNEL_MATRIX_
+
+#include <vector>
+#include "kernel_matrix_abstract.h"
+#include "../matrix.h"
+#include "../algs.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <typename kernel_type, typename T>
+        inline const typename T::type& access ( const matrix_exp<T>& m, long i)
+        {
+            return m(i);
+        }
+
+        // bind to anything that looks like an array and isn't a matrix
+        template <typename kernel_type, typename T>
+        inline const typename disable_if<is_matrix<T>,typename T::type>::type& access ( const T& m, long i)
+        {
+            return m[i];
+        }
+
+        // Only use this function if T isn't a std::pair because in that case the entire vector is
+        // probably itself a sparse sample.
+        template <typename kernel_type, typename T, typename alloc>
+        inline typename disable_if<is_pair<T>,const T&>::type access ( const std::vector<T,alloc>& m, long i)
+        {
+            return m[i];
+        }
+
+        // Only use this function if T isn't a std::pair because in that case the entire vector is
+        // probably a sparse sample.
+        template <typename kernel_type, typename T, typename alloc>
+        inline typename disable_if<is_pair<T>,const T&>::type access ( const std_vector_c<T,alloc>& m, long i)
+        {
+            return m[i];
+        }
+
+        template <typename kernel_type>
+        inline const typename kernel_type::sample_type& access ( 
+            const typename kernel_type::sample_type& samp, 
+            long 
+        )
+        {
+            return samp;
+        }
+
+        // --------------------------------------------
+
+        template <typename kernel_type, typename T>
+        inline typename disable_if<is_same_type<T,typename kernel_type::sample_type>,unsigned long>::type 
+        size ( const T& m)
+        {
+            return m.size();
+        }
+
+        template <typename kernel_type>
+        inline size_t size (
+            const typename kernel_type::sample_type&  
+        )
+        {
+            return 1;
+        }
+
+        // --------------------------------------------
+
+        template <typename T>
+        typename disable_if<is_matrix<T> >::type assert_is_vector(const T&)
+        {}
+
+        template <typename T>
+        // This funny #ifdef thing is here because gcc sometimes gives a warning 
+        // about v being unused otherwise.
+#ifdef ENABLE_ASSERTS
+        void assert_is_vector(const matrix_exp<T>& v)
+#else
+        void assert_is_vector(const matrix_exp<T>& )
+#endif
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_vector(v) == true,
+                "\tconst matrix_exp kernel_matrix()"
+                << "\n\t You have to supply this function with row or column vectors"
+                << "\n\t v.nr(): " << v.nr()
+                << "\n\t v.nc(): " << v.nc()
+                );
+        }
+
+    }
+
+    template <typename K, typename vect_type1, typename vect_type2>
+    struct op_kern_mat  
+    {
+        op_kern_mat( 
+            const K& kern_, 
+            const vect_type1& vect1_,
+            const vect_type2& vect2_
+        ) : 
+            kern(kern_), 
+            vect1(vect1_),
+            vect2(vect2_) 
+        {
+            // make sure the requires clauses get checked eventually
+            impl::assert_is_vector(vect1);
+            impl::assert_is_vector(vect2);
+        }
+
+        const K& kern;
+        const vect_type1& vect1;
+        const vect_type2& vect2;
+
+        typedef typename K::scalar_type type;
+
+        const static long cost = 100;
+        const static long NR = (is_same_type<vect_type1,typename K::sample_type>::value) ? 1 : 0;
+        const static long NC = (is_same_type<vect_type2,typename K::sample_type>::value) ? 1 : 0;
+
+        typedef const type const_ret_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+        typedef row_major_layout layout_type;
+
+        const_ret_type apply (long r, long c ) const 
+        { 
+            return kern(impl::access<K>(vect1,r), impl::access<K>(vect2,c)); 
+        }
+
+        long nr () const { return impl::size<K>(vect1); }
+        long nc () const { return impl::size<K>(vect2); }
+
+        template <typename U> bool aliases               ( const matrix_exp<U>& item ) const { return alias_helper(item.ref()); }
+        template <typename U> bool destructively_aliases ( const matrix_exp<U>& item ) const { return alias_helper(item.ref()); }
+
+        template <typename U> bool alias_helper  ( const U& ) const { return false; }
+
+        typedef typename K::sample_type samp_type;
+
+        // Say we destructively alias if one of the vect* objects is actually item.
+        bool alias_helper                   (const samp_type& item ) const { return are_same(item, vect1) || are_same(item, vect2); }
+        template <typename U> bool are_same (const samp_type& , const U& )         const { return false; }
+        bool are_same                       (const samp_type& a, const samp_type& b) const { return (&a == &b); }
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K,
+        typename V1,
+        typename V2 
+        >
+    const matrix_op<op_kern_mat<K,V1,V2> > kernel_matrix (
+        const K& kern,
+        const V1& v1,
+        const V2& v2
+        )
+    {
+        typedef op_kern_mat<K,V1,V2> op;
+        return matrix_op<op>(op(kern,v1,v2));
+    }
+    
+// ----------------------------------------------------------------------------------------
+
+    /*
+        It is possible to implement the kernel_matrix() operator with just one operator
+        class but treating the version that takes only a single vector separately
+        leads to more efficient output by gcc in certain instances.  
+    */
+    template <typename K, typename vect_type1>
+    struct op_kern_mat_single  
+    {
+        op_kern_mat_single( 
+            const K& kern_, 
+            const vect_type1& vect1_
+        ) : 
+            kern(kern_), 
+            vect1(vect1_)
+        {
+            // make sure the requires clauses get checked eventually
+            impl::assert_is_vector(vect1);
+        }
+
+        const K& kern;
+        const vect_type1& vect1;
+
+        typedef typename K::scalar_type type;
+
+        const static long cost = 100;
+        const static long NR = (is_same_type<vect_type1,typename K::sample_type>::value) ? 1 : 0;
+        const static long NC = (is_same_type<vect_type1,typename K::sample_type>::value) ? 1 : 0;
+
+        typedef const type const_ret_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+        typedef row_major_layout layout_type;
+
+        const_ret_type apply (long r, long c ) const 
+        { 
+            return kern(impl::access<K>(vect1,r), impl::access<K>(vect1,c)); 
+        }
+
+        long nr () const { return impl::size<K>(vect1); }
+        long nc () const { return impl::size<K>(vect1); }
+
+        template <typename U> bool aliases               ( const matrix_exp<U>& item ) const { return alias_helper(item.ref()); }
+        template <typename U> bool destructively_aliases ( const matrix_exp<U>& item ) const { return alias_helper(item.ref()); }
+
+        template <typename U> bool alias_helper  ( const U& ) const { return false; }
+
+        typedef typename K::sample_type samp_type;
+
+        // Say we destructively alias if vect1 is actually item.
+        bool alias_helper                   (const samp_type& item ) const { return are_same(item, vect1); }
+        template <typename U> bool are_same (const samp_type& , const U& )         const { return false; }
+        bool are_same                       (const samp_type& a, const samp_type& b) const { return (&a == &b); }
+    }; 
+
+    template <
+        typename K,
+        typename V
+        >
+    const matrix_op<op_kern_mat_single<K,V> > kernel_matrix (
+        const K& kern,
+        const V& v
+        )
+    {
+        typedef op_kern_mat_single<K,V> op;
+        return matrix_op<op>(op(kern,v));
+    }
+    
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_dest_type,
+        typename K,
+        typename V
+        >
+    inline void matrix_assign (
+        matrix_dest_type& dest,
+        const matrix_exp<matrix_op<op_kern_mat_single<K,V> > >& src
+    )
+    /*!
+        Overload matrix assignment so that when a kernel_matrix expression
+        gets assigned it only evaluates half the kernel matrix (since it is symmetric)
+    !*/
+    {
+        for (long r = 0; r < src.nr(); ++r)
+        {
+            for (long c = r; c < src.nc(); ++c)
+            {
+                dest(r,c) = dest(c,r) = src(r,c);
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_KERNEL_MATRIX_
+
diff --git a/ml/dlib/dlib/svm/kernel_matrix_abstract.h b/ml/dlib/dlib/svm/kernel_matrix_abstract.h
new file mode 100644
index 000000000..4aa4b1ce2
--- /dev/null
+++ b/ml/dlib/dlib/svm/kernel_matrix_abstract.h
@@ -0,0 +1,115 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_KERNEL_MATRIX_ABSTRACT_
+#ifdef DLIB_SVm_KERNEL_MATRIX_ABSTRACT_
+
+#include <vector>
+#include "kernel_abstract.h"
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename V
+        >
+    const matrix_exp kernel_matrix (
+        const kernel_type& kernel,
+        const V& v
+    );
+    /*!
+        requires
+            - kernel == a kernel function object as defined by the file dlib/svm/kernel_abstract.h.
+              This kernel must also be capable of operating on the contents of v.
+            - V == dlib::matrix, std::vector, dlib::std_vector_c, dlib::random_subset_selector, 
+              dlib::linearly_independent_subset_finder, or kernel_type::sample_type.
+            - if (V is a dlib::matrix) then
+                - is_vector(v) == true
+        ensures
+            - if (V is of type kernel_type::sample_type) then
+                - returns a matrix R such that:
+                    - R::type == kernel_type::scalar_type
+                    - R.size() == 1
+                    - R(0,0) == kernel(v,v)
+            - else
+                - returns a matrix R such that:
+                    - R::type == kernel_type::scalar_type
+                    - R is a square matrix of v.size() rows by v.size() columns
+                    - for all valid r and c:
+                        - R(r,c) == kernel(v(r), v(c))
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename V1,
+        typename V2
+        >
+    const matrix_exp kernel_matrix (
+        const kernel_type& kernel,
+        const V1& v1,
+        const V2& v2
+    );
+    /*!
+        requires
+            - kernel == a kernel function object as defined by the file dlib/svm/kernel_abstract.h
+              This kernel must also be capable of operating on the contents of v1 and v2.
+            - V1 == dlib::matrix, std::vector, dlib::std_vector_c, dlib::random_subset_selector,  
+              dlib::linearly_independent_subset_finder, or kernel_type::sample_type.
+            - V2 == dlib::matrix, std::vector, dlib::std_vector_c, dlib::random_subset_selector, 
+              dlib::linearly_independent_subset_finder, or kernel_type::sample_type.
+            - if (V1 is a dlib::matrix) then
+                - is_vector(v1) == true
+            - if (V2 is a dlib::matrix) then
+                - is_vector(v2) == true
+        ensures
+            - if (V1 and V2 are of type kernel_type::sample_type) then
+                - returns a matrix R such that:
+                    - R::type == kernel_type::scalar_type
+                    - R.size() == 1
+                    - R(0,0) == kernel(v1,v2)
+            - else if (V1 is of type kernel_type::sample_type) then
+                - returns a matrix R such that:
+                    - R::type == kernel_type::scalar_type
+                    - R.nr() == 1
+                    - R.nc() == v2.size()
+                    - for all valid c:
+                        - R(0,c) == kernel(v1, v2(c))
+            - else if (V2 is of type kernel_type::sample_type) then
+                - returns a matrix R such that:
+                    - R::type == kernel_type::scalar_type
+                    - R.nr() == v1.size()
+                    - R.nc() == 1
+                    - for all valid r:
+                        - R(r,0) == kernel(v1(r), v2)
+            - else
+                - returns a matrix R such that:
+                    - R::type == kernel_type::scalar_type
+                    - R.nr() == v1.size()
+                    - R.nc() == v2.size()
+                    - for all valid r and c:
+                        - R(r,c) == kernel(v1(r), v2(c))
+
+
+            A note about aliasing (see the examples/matrix_expressions_ex.cpp example program
+            for a discussion of what aliasing is in the context of the dlib::matrix): 
+                kernel_matrix() expressions can detect aliasing of an argument if that 
+                argument is of type kernel_type::sample_type.  However, it can't detect
+                aliasing though std::vectors or other "list of sample type" container class
+                arguments.  This means that it is safe to assign a kernel_matrix() expression
+                to a sample_type if V1 or V2 are of sample_type but not safe otherwise.  However,
+                since the latter case results in a general n by m matrix rather than a column
+                or row vector you shouldn't ever be doing it anyway.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+    
+#endif // DLIB_SVm_KERNEL_MATRIX_ABSTRACT_
+
diff --git a/ml/dlib/dlib/svm/kkmeans.h b/ml/dlib/dlib/svm/kkmeans.h
new file mode 100644
index 000000000..4c72106d8
--- /dev/null
+++ b/ml/dlib/dlib/svm/kkmeans.h
@@ -0,0 +1,654 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_KKMEANs_
+#define DLIB_KKMEANs_
+
+#include <cmath>
+#include <vector>
+
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include "kernel.h"
+#include "../array.h"
+#include "kcentroid.h"
+#include "kkmeans_abstract.h"
+#include "../noncopyable.h"
+
+namespace dlib
+{
+
+    template <
+        typename kernel_type
+        >
+    class kkmeans : public noncopyable
+    {
+    public:
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+        kkmeans (
+            const kcentroid<kernel_type>& kc_ 
+        ):
+            kc(kc_),
+            min_change(0.01)
+        {
+            set_number_of_centers(1);
+        }
+
+        ~kkmeans()
+        {
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kc.get_kernel();
+        }
+
+        void set_kcentroid (
+            const kcentroid<kernel_type>& kc_
+        )
+        {
+            kc = kc_;
+            set_number_of_centers(number_of_centers());
+        }
+
+        const kcentroid<kernel_type>& get_kcentroid (
+            unsigned long i
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(i < number_of_centers(),
+                "\tkcentroid kkmeans::get_kcentroid(i)"
+                << "\n\tYou have given an invalid value for i"
+                << "\n\ti:                   " << i 
+                << "\n\tnumber_of_centers(): " << number_of_centers() 
+                << "\n\tthis:                " << this
+                );
+
+            return *centers[i];
+        }
+
+        void set_number_of_centers (
+            unsigned long num
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(num > 0,
+                "\tvoid kkmeans::set_number_of_centers()"
+                << "\n\tYou can't set the number of centers to zero"
+                << "\n\tthis: " << this
+                );
+
+            centers.set_max_size(num);
+            centers.set_size(num);
+
+            for (unsigned long i = 0; i < centers.size(); ++i)
+            {
+                centers[i].reset(new kcentroid<kernel_type>(kc));
+            }
+        }
+
+        unsigned long number_of_centers (
+        ) const
+        {
+            return centers.size();
+        }
+
+        template <typename T, typename U>
+        void train (
+            const T& samples,
+            const U& initial_centers,
+            long max_iter = 1000
+        )
+        {
+            do_train(mat(samples),mat(initial_centers),max_iter);
+        }
+
+        unsigned long operator() (
+            const sample_type& sample
+        ) const
+        {
+            unsigned long label = 0;
+            scalar_type best_score = (*centers[0])(sample);
+
+            // figure out which center the given sample is closest too
+            for (unsigned long i = 1; i < centers.size(); ++i)
+            {
+                scalar_type temp = (*centers[i])(sample);
+                if (temp < best_score)
+                {
+                    label = i;
+                    best_score = temp;
+                }
+            }
+
+            return label;
+        }
+
+        void set_min_change (
+            scalar_type min_change_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT( 0 <= min_change_ < 1,
+                "\tvoid kkmeans::set_min_change()"
+                << "\n\tInvalid arguments to this function"
+                << "\n\tthis: " << this
+                << "\n\tmin_change_: " << min_change_ 
+                );
+            min_change = min_change_;
+        }
+
+        const scalar_type get_min_change (
+        ) const
+        {
+            return min_change;
+        }
+
+        void swap (
+            kkmeans& item
+        )
+        {
+            centers.swap(item.centers);
+            kc.swap(item.kc);
+            assignments.swap(item.assignments);
+            exchange(min_change, item.min_change);
+        }
+
+        friend void serialize(const kkmeans& item, std::ostream& out)
+        {
+            serialize(item.centers.size(),out);
+            for (unsigned long i = 0; i < item.centers.size(); ++i)
+            {
+                serialize(*item.centers[i], out);
+            }
+            serialize(item.kc, out);
+            serialize(item.min_change, out);
+        }
+
+        friend void deserialize(kkmeans& item, std::istream& in)
+        {
+            unsigned long num;
+            deserialize(num, in);
+            item.centers.resize(num);
+            for (unsigned long i = 0; i < item.centers.size(); ++i)
+            {
+                std::unique_ptr<kcentroid<kernel_type> > temp(new kcentroid<kernel_type>(kernel_type()));
+                deserialize(*temp, in);
+                item.centers[i].swap(temp);
+            }
+
+            deserialize(item.kc, in);
+            deserialize(item.min_change, in);
+        }
+
+    private:
+
+        template <typename matrix_type, typename matrix_type2>
+        void do_train (
+            const matrix_type& samples,
+            const matrix_type2& initial_centers,
+            long max_iter = 1000
+        )
+        {
+            COMPILE_TIME_ASSERT((is_same_type<typename matrix_type::type, sample_type>::value));
+            COMPILE_TIME_ASSERT((is_same_type<typename matrix_type2::type, sample_type>::value));
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(samples.nc() == 1 && initial_centers.nc() == 1 &&
+                         initial_centers.nr() == static_cast<long>(number_of_centers()),
+                "\tvoid kkmeans::train()"
+                << "\n\tInvalid arguments to this function"
+                << "\n\tthis: " << this
+                << "\n\tsamples.nc(): " << samples.nc() 
+                << "\n\tinitial_centers.nc(): " << initial_centers.nc() 
+                << "\n\tinitial_centers.nr(): " << initial_centers.nr() 
+                );
+
+            // clear out the old data and initialize the centers
+            for (unsigned long i = 0; i < centers.size(); ++i)
+            {
+                centers[i]->clear_dictionary();
+                centers[i]->train(initial_centers(i));
+            }
+
+            assignments.resize(samples.size());
+
+            bool assignment_changed = true;
+
+            // loop until the centers stabilize 
+            long count = 0;
+            const unsigned long min_num_change = static_cast<unsigned long>(min_change*samples.size());
+            unsigned long num_changed = min_num_change;
+            while (assignment_changed && count < max_iter && num_changed >= min_num_change)
+            {
+                ++count;
+                assignment_changed = false;
+                num_changed = 0;
+
+                // loop over all the samples and assign them to their closest centers
+                for (long i = 0; i < samples.size(); ++i)
+                {
+                    // find the best center
+                    unsigned long best_center = 0;
+                    scalar_type best_score = (*centers[0])(samples(i));
+                    for (unsigned long c = 1; c < centers.size(); ++c)
+                    {
+                        scalar_type temp = (*centers[c])(samples(i));
+                        if (temp < best_score)
+                        {
+                            best_score = temp;
+                            best_center = c;
+                        }
+                    }
+
+                    // if the current sample changed centers then make note of that
+                    if (assignments[i] != best_center)
+                    {
+                        assignments[i] = best_center;
+                        assignment_changed = true;
+                        ++num_changed;
+                    }
+                }
+
+                if (assignment_changed)
+                {
+                    // now clear out the old data 
+                    for (unsigned long i = 0; i < centers.size(); ++i)
+                        centers[i]->clear_dictionary();
+
+                    // recalculate the cluster centers 
+                    for (unsigned long i = 0; i < assignments.size(); ++i)
+                        centers[assignments[i]]->train(samples(i));
+                }
+
+            }
+
+
+        }
+
+        array<std::unique_ptr<kcentroid<kernel_type> > > centers;
+        kcentroid<kernel_type> kc;
+        scalar_type min_change;
+
+        // temp variables
+        array<unsigned long> assignments;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename kernel_type>
+    void swap(kkmeans<kernel_type>& a, kkmeans<kernel_type>& b)
+    { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+    struct dlib_pick_initial_centers_data
+    {
+        dlib_pick_initial_centers_data():idx(0), dist(std::numeric_limits<double>::infinity()){}
+        long idx;
+        double dist;
+        bool operator< (const dlib_pick_initial_centers_data& d) const { return dist < d.dist; }
+    };
+
+    template <
+        typename vector_type1, 
+        typename vector_type2, 
+        typename kernel_type
+        >
+    void pick_initial_centers(
+        long num_centers, 
+        vector_type1& centers, 
+        const vector_type2& samples, 
+        const kernel_type& k, 
+        double percentile = 0.01
+    )
+    {
+        /*
+            This function is basically just a non-randomized version of the kmeans++ algorithm
+            described in the paper:
+                kmeans++: The Advantages of Careful Seeding by Arthur and Vassilvitskii
+
+        */
+
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT(num_centers > 1 && 0 <= percentile && percentile < 1 && samples.size() > 1,
+            "\tvoid pick_initial_centers()"
+            << "\n\tYou passed invalid arguments to this function"
+            << "\n\tnum_centers: " << num_centers 
+            << "\n\tpercentile: " << percentile 
+            << "\n\tsamples.size(): " << samples.size() 
+            );
+
+        std::vector<dlib_pick_initial_centers_data> scores(samples.size());
+        std::vector<dlib_pick_initial_centers_data> scores_sorted(samples.size());
+        centers.clear();
+
+        // pick the first sample as one of the centers
+        centers.push_back(samples[0]);
+
+        const long best_idx = static_cast<long>(std::max(0.0,samples.size() - samples.size()*percentile - 1));
+
+        // pick the next center
+        for (long i = 0; i < num_centers-1; ++i)
+        {
+            // Loop over the samples and compare them to the most recent center.  Store
+            // the distance from each sample to its closest center in scores.
+            const double k_cc = k(centers[i], centers[i]);
+            for (unsigned long s = 0; s < samples.size(); ++s)
+            {
+                // compute the distance between this sample and the current center
+                const double dist = k_cc + k(samples[s],samples[s]) - 2*k(samples[s], centers[i]);
+
+                if (dist < scores[s].dist)
+                {
+                    scores[s].dist = dist;
+                    scores[s].idx = s;
+                }
+            }
+
+            scores_sorted = scores;
+
+            // now find the winning center and add it to centers.  It is the one that is 
+            // far away from all the other centers.
+            sort(scores_sorted.begin(), scores_sorted.end());
+            centers.push_back(samples[scores_sorted[best_idx].idx]);
+        }
+        
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type1, 
+        typename vector_type2
+        >
+    void pick_initial_centers(
+        long num_centers, 
+        vector_type1& centers, 
+        const vector_type2& samples, 
+        double percentile = 0.01
+    )
+    {
+        typedef typename vector_type1::value_type sample_type;
+        linear_kernel<sample_type> kern;
+        pick_initial_centers(num_centers, centers, samples, kern, percentile);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename array_type, 
+        typename sample_type,
+        typename alloc
+        >
+    void find_clusters_using_kmeans (
+        const array_type& samples,
+        std::vector<sample_type, alloc>& centers,
+        unsigned long max_iter = 1000
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(samples.size() > 0 && centers.size() > 0,
+            "\tvoid find_clusters_using_kmeans()"
+            << "\n\tYou passed invalid arguments to this function"
+            << "\n\t samples.size(): " << samples.size() 
+            << "\n\t centers.size(): " << centers.size() 
+            );
+
+#ifdef ENABLE_ASSERTS
+        {
+        const long nr = samples[0].nr();
+        const long nc = samples[0].nc();
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            DLIB_ASSERT(is_vector(samples[i]) && samples[i].nr() == nr && samples[i].nc() == nc,
+                "\tvoid find_clusters_using_kmeans()"
+                << "\n\t You passed invalid arguments to this function"
+                << "\n\t is_vector(samples[i]): " << is_vector(samples[i])
+                << "\n\t samples[i].nr():       " << samples[i].nr()
+                << "\n\t nr:                    " << nr
+                << "\n\t samples[i].nc():       " << samples[i].nc()
+                << "\n\t nc:                    " << nc
+                << "\n\t i:                     " << i
+                );
+        }
+        }
+#endif
+
+        typedef typename sample_type::type scalar_type;
+
+        sample_type zero(centers[0]);
+        set_all_elements(zero, 0);
+
+        std::vector<unsigned long> center_element_count;
+
+        // tells which center a sample belongs to
+        std::vector<unsigned long> assignments(samples.size(), samples.size());
+
+
+        unsigned long iter = 0;
+        bool centers_changed = true;
+        while (centers_changed && iter < max_iter)
+        {
+            ++iter;
+            centers_changed = false;
+            center_element_count.assign(centers.size(), 0);
+
+            // loop over each sample and see which center it is closest to
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                // find the best center for sample[i]
+                scalar_type best_dist = std::numeric_limits<scalar_type>::max();
+                unsigned long best_center = 0;
+                for (unsigned long j = 0; j < centers.size(); ++j)
+                {
+                    scalar_type dist = length(centers[j] - samples[i]);
+                    if (dist < best_dist)
+                    {
+                        best_dist = dist;
+                        best_center = j;
+                    }
+                }
+
+                if (assignments[i] != best_center)
+                {
+                    centers_changed = true;
+                    assignments[i] = best_center;
+                }
+
+                center_element_count[best_center] += 1;
+            }
+
+            // now update all the centers
+            centers.assign(centers.size(), zero);
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                centers[assignments[i]] += samples[i];
+            }
+            for (unsigned long i = 0; i < centers.size(); ++i)
+            {
+                if (center_element_count[i] != 0)
+                    centers[i] /= center_element_count[i];
+            }
+        }
+
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename array_type, 
+        typename sample_type,
+        typename alloc
+        >
+    void find_clusters_using_angular_kmeans (
+        const array_type& samples,
+        std::vector<sample_type, alloc>& centers,
+        unsigned long max_iter = 1000
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(samples.size() > 0 && centers.size() > 0,
+            "\tvoid find_clusters_using_angular_kmeans()"
+            << "\n\tYou passed invalid arguments to this function"
+            << "\n\t samples.size(): " << samples.size() 
+            << "\n\t centers.size(): " << centers.size() 
+            );
+
+#ifdef ENABLE_ASSERTS
+        {
+        const long nr = samples[0].nr();
+        const long nc = samples[0].nc();
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            DLIB_ASSERT(is_vector(samples[i]) && samples[i].nr() == nr && samples[i].nc() == nc,
+                "\tvoid find_clusters_using_angular_kmeans()"
+                << "\n\t You passed invalid arguments to this function"
+                << "\n\t is_vector(samples[i]): " << is_vector(samples[i])
+                << "\n\t samples[i].nr():       " << samples[i].nr()
+                << "\n\t nr:                    " << nr
+                << "\n\t samples[i].nc():       " << samples[i].nc()
+                << "\n\t nc:                    " << nc
+                << "\n\t i:                     " << i
+                );
+        }
+        }
+#endif
+
+        typedef typename sample_type::type scalar_type;
+
+        sample_type zero(centers[0]);
+        set_all_elements(zero, 0);
+
+        unsigned long seed = 0;
+
+        // tells which center a sample belongs to
+        std::vector<unsigned long> assignments(samples.size(), samples.size());
+        std::vector<double> lengths;
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            lengths.push_back(length(samples[i]));
+            // If there are zero vectors in samples then just say their length is 1 so we
+            // can avoid a division by zero check later on.  Also, this doesn't matter
+            // since zero vectors can be assigned to any cluster randomly as there is no
+            // basis for picking one based on angle.
+            if (lengths.back() == 0)
+                lengths.back() = 1;
+        }
+
+        // We will keep the centers as unit vectors at all times throughout the processing.
+        for (unsigned long i = 0; i < centers.size(); ++i)
+        {
+            double len = length(centers[i]);
+            // Avoid having length 0 centers.  If that is the case then pick another center
+            // at random.
+            while(len == 0)
+            {
+                centers[i] = matrix_cast<scalar_type>(gaussian_randm(centers[i].nr(), centers[i].nc(), seed++));
+                len = length(centers[i]);
+            }
+            centers[i] /= len;
+        }
+
+
+        unsigned long iter = 0;
+        bool centers_changed = true;
+        while (centers_changed && iter < max_iter)
+        {
+            ++iter;
+            centers_changed = false;
+
+            // loop over each sample and see which center it is closest to
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                // find the best center for sample[i]
+                scalar_type best_angle = std::numeric_limits<scalar_type>::max();
+                unsigned long best_center = 0;
+                for (unsigned long j = 0; j < centers.size(); ++j)
+                {
+                    scalar_type angle = -dot(centers[j],samples[i])/lengths[i];
+
+                    if (angle < best_angle)
+                    {
+                        best_angle = angle;
+                        best_center = j;
+                    }
+                }
+
+                if (assignments[i] != best_center)
+                {
+                    centers_changed = true;
+                    assignments[i] = best_center;
+                }
+            }
+
+            // now update all the centers
+            centers.assign(centers.size(), zero);
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                centers[assignments[i]] += samples[i];
+            }
+            // Now length normalize all the centers.
+            for (unsigned long i = 0; i < centers.size(); ++i)
+            {
+                double len = length(centers[i]);
+                // Avoid having length 0 centers.  If that is the case then pick another center
+                // at random.
+                while(len == 0)
+                {
+                    centers[i] = matrix_cast<scalar_type>(gaussian_randm(centers[i].nr(), centers[i].nc(), seed++));
+                    len = length(centers[i]);
+                    centers_changed = true;
+                }
+                centers[i] /= len;
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename array_type, 
+        typename EXP 
+        >
+    unsigned long nearest_center (
+        const array_type& centers,
+        const matrix_exp<EXP>& sample
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(centers.size() > 0 && sample.size() > 0 && is_vector(sample),
+            "\t unsigned long nearest_center()"
+            << "\n\t You have given invalid inputs to this function."
+            << "\n\t centers.size():    " << centers.size() 
+            << "\n\t sample.size():     " << sample.size() 
+            << "\n\t is_vector(sample): " << is_vector(sample) 
+            );
+
+        double best_dist = length_squared(centers[0] - sample);
+        unsigned long best_idx = 0;
+        for (unsigned long i = 1; i < centers.size(); ++i)
+        {
+            const double dist = length_squared(centers[i] - sample);
+            if (dist < best_dist)
+            {
+                best_dist = dist;
+                best_idx = i;
+            }
+        }
+        return best_idx;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_KKMEANs_
+
+
diff --git a/ml/dlib/dlib/svm/kkmeans_abstract.h b/ml/dlib/dlib/svm/kkmeans_abstract.h
new file mode 100644
index 000000000..9f9d7ccce
--- /dev/null
+++ b/ml/dlib/dlib/svm/kkmeans_abstract.h
@@ -0,0 +1,365 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_KKMEANs_ABSTRACT_
+#ifdef DLIB_KKMEANs_ABSTRACT_
+
+#include <cmath>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include "kernel_abstract.h"
+#include "kcentroid_abstract.h"
+#include "../noncopyable.h"
+
+namespace dlib
+{
+
+    template <
+        typename kernel_type 
+        >
+    class kkmeans : public noncopyable
+    {
+        /*!
+            REQUIREMENTS ON kernel_type
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            INITIAL VALUE
+                - number_of_centers() == 1
+                - get_min_change() == 0.01
+
+            WHAT THIS OBJECT REPRESENTS
+                This is an implementation of a kernelized k-means clustering algorithm.  
+                It performs k-means clustering by using the kcentroid object.  
+        !*/
+
+    public:
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+        kkmeans (
+            const kcentroid<kernel_type>& kc_ 
+        );
+        /*!
+            ensures
+                - #number_of_centers() == 1
+                - #get_min_change() == 0.01
+                - #get_kcentroid(0) == a copy of kc_
+        !*/
+
+        ~kkmeans(
+        );
+        /*!
+            ensures
+                - all resources associated with *this have been released
+        !*/
+
+        void set_kcentroid (
+            const kcentroid<kernel_type>& kc_
+        );
+        /*!
+            ensures
+                - for all idx:  
+                    - #get_kcentroid(idx) == a copy of kc_
+        !*/
+
+        const kcentroid<kernel_type>& get_kcentroid (
+            unsigned long i
+        ) const;
+        /*!
+            requires
+                - i < number_of_centers()
+            ensures
+                - returns a const reference to the ith kcentroid object contained in
+                  this object.  Each kcentroid represents one of the centers found
+                  by the k-means clustering algorithm.
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a const reference to the kernel used by this object
+        !*/
+
+        void set_number_of_centers (
+            unsigned long num
+        );
+        /*!
+            requires
+                - num > 0
+            ensures
+                - #number_of_centers() == num
+        !*/
+
+        unsigned long number_of_centers (
+        ) const;
+        /*!
+            ensures
+                - returns the number of centers used in this instance of the k-means clustering
+                  algorithm.
+        !*/
+
+        template <
+            typename matrix_type,
+            typename matrix_type2
+            >
+        void train (
+            const matrix_type& samples,
+            const matrix_type2& initial_centers,
+            long max_iter = 1000
+        );
+        /*!
+            requires
+                - matrix_type and matrix_type2 must either be dlib::matrix objects or convertible to dlib::matrix
+                  via mat()
+                - matrix_type::type == sample_type  (i.e. matrix_type should contain sample_type objects)
+                - matrix_type2::type == sample_type (i.e. matrix_type2 should contain sample_type objects)
+                - initial_centers.nc() == 1         (i.e. must be a column vector)
+                - samples.nc() == 1                 (i.e. must be a column vector)
+                - initial_centers.nr() == number_of_centers()
+            ensures
+                - performs k-means clustering of the given set of samples.  The initial center points
+                  are taken from the initial_centers argument.
+                - loops over the data and continues to refine the clustering until either less than 
+                  get_min_change() fraction of the data points change clusters or we have done max_iter 
+                  iterations over the data.
+                - After this function finishes you can call the operator() function below
+                  to determine which centroid a given sample is closest to.
+        !*/
+
+        unsigned long operator() (
+            const sample_type& sample
+        ) const;
+        /*!
+            ensures
+                - returns a number idx such that:
+                    - idx < number_of_centers()
+                    - get_kcentroid(idx) == the centroid that is closest to the given
+                      sample.
+        !*/
+
+        void set_min_change (
+            scalar_type min_change
+        );
+        /*!
+            requires
+                - 0 <= min_change < 1
+            ensures
+                - #get_min_change() == min_change
+        !*/
+
+        const scalar_type get_min_change (
+        ) const;
+        /*!
+            ensures
+                - returns the minimum fraction of data points that need to change
+                  centers in an iteration of kmeans for the algorithm to keep going.
+        !*/
+
+        void swap (
+            kkmeans& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type
+        >
+    void swap(
+        kkmeans<kernel_type>& a, 
+        kkmeans<kernel_type>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap function
+    !*/
+
+    template <
+        typename kernel_type
+        >
+    void serialize (
+        const kkmeans<kernel_type>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for kkmeans objects
+    !*/
+
+    template <
+        typename kernel_type 
+        >
+    void deserialize (
+        kkmeans<kernel_type>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for kkmeans objects
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type1, 
+        typename vector_type2, 
+        typename kernel_type
+        >
+    void pick_initial_centers(
+        long num_centers, 
+        vector_type1& centers, 
+        const vector_type2& samples, 
+        const kernel_type& k, 
+        double percentile = 0.01
+    );
+    /*!
+        requires
+            - num_centers > 1
+            - 0 <= percentile < 1
+            - samples.size() > 1
+            - vector_type1 == something with an interface compatible with std::vector
+            - vector_type2 == something with an interface compatible with std::vector
+            - k(samples[0],samples[0]) must be a valid expression that returns a double
+            - both centers and samples must be able to contain kernel_type::sample_type 
+              objects
+        ensures
+            - finds num_centers candidate cluster centers in the data in the samples 
+              vector.  Assumes that k is the kernel that will be used during clustering 
+              to define the space in which clustering occurs.
+            - The centers are found by looking for points that are far away from other 
+              candidate centers.  However, if the data is noisy you probably want to 
+              ignore the farthest way points since they will be outliers.  To do this 
+              set percentile to the fraction of outliers you expect the data to contain.
+            - #centers.size() == num_centers
+            - #centers == a vector containing the candidate centers found
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type1, 
+        typename vector_type2
+        >
+    void pick_initial_centers(
+        long num_centers, 
+        vector_type1& centers, 
+        const vector_type2& samples, 
+        double percentile = 0.01
+    );
+    /*!
+        requires
+            - num_centers > 1
+            - 0 <= percentile < 1
+            - samples.size() > 1
+            - vector_type1 == something with an interface compatible with std::vector
+            - vector_type2 == something with an interface compatible with std::vector
+            - Both centers and samples must be able to contain dlib::matrix based row or
+              column vectors.
+        ensures
+            - performs: pick_initial_centers(num_centers, centers, samples, linear_kernel<sample_type>(), percentile)
+              (i.e. this function is simply an overload that uses the linear kernel.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename array_type, 
+        typename sample_type,
+        typename alloc
+        >
+    void find_clusters_using_kmeans (
+        const array_type& samples,
+        std::vector<sample_type, alloc>& centers,
+        unsigned long max_iter = 1000
+    );
+    /*!
+        requires
+            - samples.size() > 0
+            - samples == a bunch of row or column vectors and they all must be of the
+              same length.
+            - centers.size() > 0
+            - array_type == something with an interface compatible with std::vector
+              and it must contain row or column vectors capable of being stored in 
+              sample_type objects.
+            - sample_type == a dlib::matrix capable of representing vectors
+        ensures
+            - performs regular old linear kmeans clustering on the samples.  The clustering
+              begins with the initial set of centers given as an argument to this function.
+              When it finishes #centers will contain the resulting centers.
+            - no more than max_iter iterations will be performed before this function
+              terminates.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename array_type, 
+        typename sample_type,
+        typename alloc
+        >
+    void find_clusters_using_angular_kmeans (
+        const array_type& samples,
+        std::vector<sample_type, alloc>& centers,
+        unsigned long max_iter = 1000
+    );
+    /*!
+        requires
+            - samples.size() > 0
+            - samples == a bunch of row or column vectors and they all must be of the
+              same length.
+            - centers.size() > 0
+            - array_type == something with an interface compatible with std::vector
+              and it must contain row or column vectors capable of being stored in 
+              sample_type objects.
+            - sample_type == a dlib::matrix capable of representing vectors
+        ensures
+            - performs linear kmeans clustering on the samples, except instead of using
+              Euclidean distance to compare samples to the centers it uses the angle
+              between a sample and a center (with respect to the origin).  So we try to
+              cluster samples together if they have small angles with respect to each
+              other. The clustering begins with the initial set of centers given as an
+              argument to this function.  When it finishes #centers will contain the
+              resulting centers.
+            - for all valid i:
+                - length(#centers[i]) == 1
+                  (i.e. the output centers are scaled to be unit vectors since their
+                  magnitude is irrelevant.  Moreover, this makes it so you can use
+                  functions like nearest_center() with #centers to find the cluster
+                  assignments.)
+            - No more than max_iter iterations will be performed before this function
+              terminates.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename array_type, 
+        typename EXP 
+        >
+    unsigned long nearest_center (
+        const array_type& centers,
+        const matrix_exp<EXP>& sample
+    );
+    /*!
+        requires
+            - centers.size() > 0
+            - sample.size() > 0
+            - is_vector(sample) == true
+            - centers must be an array of vectors such that the following expression is
+              valid: length_squared(sample - centers[0]).  (e.g. centers could be a
+              std::vector of matrix objects holding column vectors).
+        ensures
+            - returns the index that identifies the element of centers that is nearest to
+              sample.  That is, returns a number IDX such that centers[IDX] is the element
+              of centers that minimizes length(centers[IDX]-sample).
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_KKMEANs_ABSTRACT_
+
diff --git a/ml/dlib/dlib/svm/krls.h b/ml/dlib/dlib/svm/krls.h
new file mode 100644
index 000000000..6c72e45e8
--- /dev/null
+++ b/ml/dlib/dlib/svm/krls.h
@@ -0,0 +1,358 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_KRLs_
+#define DLIB_KRLs_
+
+#include <vector>
+
+#include "krls_abstract.h"
+#include "../matrix.h"
+#include "function.h"
+#include "../std_allocator.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename kernel_type>
+    class krls
+    {
+        /*!
+            This is an implementation of the kernel recursive least squares algorithm described in the paper:
+            The Kernel Recursive Least Squares Algorithm by Yaakov Engel.
+        !*/
+
+    public:
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+
+        explicit krls (
+            const kernel_type& kernel_, 
+            scalar_type tolerance_ = 0.001,
+            unsigned long max_dictionary_size_ = 1000000
+        ) : 
+            kernel(kernel_), 
+            my_tolerance(tolerance_),
+            my_max_dictionary_size(max_dictionary_size_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(tolerance_ >= 0,
+                "\tkrls::krls()"
+                << "\n\t You have to give a positive tolerance"
+                << "\n\t this: " << this
+                << "\n\t tolerance: " << tolerance_ 
+                );
+
+            clear_dictionary();
+        }
+
+        scalar_type tolerance() const
+        {
+            return my_tolerance;
+        }
+
+        unsigned long max_dictionary_size() const
+        {
+            return my_max_dictionary_size;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel;
+        }
+
+        void clear_dictionary ()
+        {
+            dictionary.clear();
+            alpha.clear();
+
+            K_inv.set_size(0,0);
+            K.set_size(0,0);
+            P.set_size(0,0);
+        }
+
+        scalar_type operator() (
+            const sample_type& x
+        ) const
+        {
+            scalar_type temp = 0;
+            for (unsigned long i = 0; i < alpha.size(); ++i)
+                temp += alpha[i]*kern(dictionary[i], x);
+
+            return temp;
+        }
+
+        void train (
+            const sample_type& x,
+            scalar_type y
+        )
+        {
+            const scalar_type kx = kern(x,x);
+            if (alpha.size() == 0)
+            {
+                // just ignore this sample if it is the zero vector (or really close to being zero)
+                if (std::abs(kx) > std::numeric_limits<scalar_type>::epsilon())
+                {
+                    // set initial state since this is the first training example we have seen
+
+                    K_inv.set_size(1,1);
+                    K_inv(0,0) = 1/kx;
+                    K.set_size(1,1);
+                    K(0,0) = kx;
+
+                    alpha.push_back(y/kx);
+                    dictionary.push_back(x);
+                    P.set_size(1,1);
+                    P(0,0) = 1;
+                }
+            }
+            else
+            {
+                // fill in k
+                k.set_size(alpha.size());
+                for (long r = 0; r < k.nr(); ++r)
+                    k(r) = kern(x,dictionary[r]);
+
+                // compute the error we would have if we approximated the new x sample
+                // with the dictionary.  That is, do the ALD test from the KRLS paper.
+                a = K_inv*k;
+                scalar_type delta = kx - trans(k)*a;
+
+                // if this new vector isn't approximately linearly dependent on the vectors
+                // in our dictionary.
+                if (delta > my_tolerance)
+                {
+                    if (dictionary.size() >= my_max_dictionary_size)
+                    {
+                        // We need to remove one of the old members of the dictionary before
+                        // we proceed with adding a new one.  So remove the oldest one. 
+                        remove_dictionary_vector(0);
+
+                        // recompute these guys since they were computed with the old
+                        // kernel matrix
+                        k = remove_row(k,0);
+                        a = K_inv*k;
+                        delta = kx - trans(k)*a;
+                    }
+
+                    // add x to the dictionary
+                    dictionary.push_back(x);
+
+                    // update K_inv by computing the new one in the temp matrix (equation 3.14)
+                    matrix<scalar_type,0,0,mem_manager_type> temp(K_inv.nr()+1, K_inv.nc()+1);
+                    // update the middle part of the matrix
+                    set_subm(temp, get_rect(K_inv)) = K_inv + a*trans(a)/delta;
+                    // update the right column of the matrix
+                    set_subm(temp, 0, K_inv.nr(),K_inv.nr(),1) = -a/delta;
+                    // update the bottom row of the matrix
+                    set_subm(temp, K_inv.nr(), 0, 1, K_inv.nr()) = trans(-a/delta);
+                    // update the bottom right corner of the matrix
+                    temp(K_inv.nr(), K_inv.nc()) = 1/delta;
+                    // put temp into K_inv
+                    temp.swap(K_inv);
+
+
+
+
+                    // update K (the kernel matrix)
+                    temp.set_size(K.nr()+1, K.nc()+1);
+                    set_subm(temp, get_rect(K)) = K;
+                    // update the right column of the matrix
+                    set_subm(temp, 0, K.nr(),K.nr(),1) = k;
+                    // update the bottom row of the matrix
+                    set_subm(temp, K.nr(), 0, 1, K.nr()) = trans(k);
+                    temp(K.nr(), K.nc()) = kx;
+                    // put temp into K
+                    temp.swap(K);
+
+
+
+
+                    // Now update the P matrix (equation 3.15)
+                    temp.set_size(P.nr()+1, P.nc()+1);
+                    set_subm(temp, get_rect(P)) = P;
+                    // initialize the new sides of P 
+                    set_rowm(temp,P.nr()) = 0;
+                    set_colm(temp,P.nr()) = 0;
+                    temp(P.nr(), P.nc()) = 1;
+                    temp.swap(P);
+
+                    // now update the alpha vector (equation 3.16)
+                    const scalar_type k_a = (y-trans(k)*mat(alpha))/delta;
+                    for (unsigned long i = 0; i < alpha.size(); ++i)
+                    {
+                        alpha[i] -= a(i)*k_a;
+                    }
+                    alpha.push_back(k_a);
+                }
+                else
+                {
+                    q = P*a/(1+trans(a)*P*a);
+
+                    // update P (equation 3.12)
+                    temp_matrix = trans(a)*P;
+                    P -= q*temp_matrix;
+
+                    // update the alpha vector (equation 3.13)
+                    const scalar_type k_a = y-trans(k)*mat(alpha);
+                    for (unsigned long i = 0; i < alpha.size(); ++i)
+                    {
+                        alpha[i] += (K_inv*q*k_a)(i);
+                    }
+                }
+            }
+        }
+
+        void swap (
+            krls& item
+        )
+        {
+            exchange(kernel, item.kernel);
+            dictionary.swap(item.dictionary);
+            alpha.swap(item.alpha);
+            K_inv.swap(item.K_inv);
+            K.swap(item.K);
+            P.swap(item.P);
+            exchange(my_tolerance, item.my_tolerance);
+            q.swap(item.q);
+            a.swap(item.a);
+            k.swap(item.k);
+            temp_matrix.swap(item.temp_matrix);
+            exchange(my_max_dictionary_size, item.my_max_dictionary_size);
+        }
+
+        unsigned long dictionary_size (
+        ) const { return dictionary.size(); }
+
+        decision_function<kernel_type> get_decision_function (
+        ) const
+        {
+            return decision_function<kernel_type>(
+                mat(alpha),
+                -sum(mat(alpha))*tau, 
+                kernel,
+                mat(dictionary)
+            );
+        }
+
+        friend void serialize(const krls& item, std::ostream& out)
+        {
+            serialize(item.kernel, out);
+            serialize(item.dictionary, out);
+            serialize(item.alpha, out);
+            serialize(item.K_inv, out);
+            serialize(item.K, out);
+            serialize(item.P, out);
+            serialize(item.my_tolerance, out);
+            serialize(item.my_max_dictionary_size, out);
+        }
+
+        friend void deserialize(krls& item, std::istream& in)
+        {
+            deserialize(item.kernel, in);
+            deserialize(item.dictionary, in);
+            deserialize(item.alpha, in);
+            deserialize(item.K_inv, in);
+            deserialize(item.K, in);
+            deserialize(item.P, in);
+            deserialize(item.my_tolerance, in);
+            deserialize(item.my_max_dictionary_size, in);
+        }
+
+    private:
+
+        inline scalar_type kern (const sample_type& m1, const sample_type& m2) const
+        { 
+            return kernel(m1,m2) + tau;
+        }
+
+        void remove_dictionary_vector (
+            long i
+        )
+        /*!
+            requires
+                - 0 <= i < dictionary.size()
+            ensures
+                - #dictionary.size() == dictionary.size() - 1
+                - #alpha.size() == alpha.size() - 1
+                - updates the K_inv matrix so that it is still a proper inverse of the
+                  kernel matrix
+                - also removes the necessary row and column from the K matrix
+                - uses the this->a variable so after this function runs that variable
+                  will contain a different value.  
+        !*/
+        {
+            // remove the dictionary vector 
+            dictionary.erase(dictionary.begin()+i);
+
+            // remove the i'th vector from the inverse kernel matrix.  This formula is basically
+            // just the reverse of the way K_inv is updated by equation 3.14 during normal training.
+            K_inv = removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i);
+
+            // now compute the updated alpha values to take account that we just removed one of 
+            // our dictionary vectors
+            a = (K_inv*remove_row(K,i)*mat(alpha));
+
+            // now copy over the new alpha values
+            alpha.resize(alpha.size()-1);
+            for (unsigned long k = 0; k < alpha.size(); ++k)
+            {
+                alpha[k] = a(k);
+            }
+
+            // update the P matrix as well
+            P = removerc(P,i,i);
+
+            // update the K matrix as well
+            K = removerc(K,i,i);
+        }
+
+
+        kernel_type kernel;
+
+        typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type;
+        typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type;
+        typedef std::vector<sample_type,alloc_sample_type> dictionary_vector_type;
+        typedef std::vector<scalar_type,alloc_scalar_type> alpha_vector_type;
+
+        dictionary_vector_type dictionary;
+        alpha_vector_type alpha;
+
+        matrix<scalar_type,0,0,mem_manager_type> K_inv;
+        matrix<scalar_type,0,0,mem_manager_type> K;
+        matrix<scalar_type,0,0,mem_manager_type> P;
+
+        scalar_type my_tolerance;
+        unsigned long my_max_dictionary_size;
+
+
+        // temp variables here just so we don't have to reconstruct them over and over.  Thus, 
+        // they aren't really part of the state of this object.
+        matrix<scalar_type,0,1,mem_manager_type> q;
+        matrix<scalar_type,0,1,mem_manager_type> a;
+        matrix<scalar_type,0,1,mem_manager_type> k;
+        matrix<scalar_type,1,0,mem_manager_type> temp_matrix;
+
+        const static scalar_type tau;
+
+    };
+
+    template <typename kernel_type>
+    const typename kernel_type::scalar_type krls<kernel_type>::tau = static_cast<typename kernel_type::scalar_type>(0.01);
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename kernel_type>
+    void swap(krls<kernel_type>& a, krls<kernel_type>& b)
+    { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_KRLs_
+
diff --git a/ml/dlib/dlib/svm/krls_abstract.h b/ml/dlib/dlib/svm/krls_abstract.h
new file mode 100644
index 000000000..7ea2d9872
--- /dev/null
+++ b/ml/dlib/dlib/svm/krls_abstract.h
@@ -0,0 +1,202 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_KRLs_ABSTRACT_
+#ifdef DLIB_KRLs_ABSTRACT_
+
+#include <cmath>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include "kernel_abstract.h"
+
+namespace dlib
+{
+
+    template <
+        typename kernel_type
+        >
+    class krls
+    {
+        /*!
+            REQUIREMENTS ON kernel_type
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            INITIAL VALUE
+                - dictionary_size() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This is an implementation of the kernel recursive least squares algorithm 
+                described in the paper:
+                    The Kernel Recursive Least Squares Algorithm by Yaakov Engel.
+
+                The long and short of this algorithm is that it is an online kernel based 
+                regression algorithm.  You give it samples (x,y) and it learns the function
+                f(x) == y.  For a detailed description of the algorithm read the above paper.
+
+                Also note that the algorithm internally keeps a set of "dictionary vectors" 
+                that are used to represent the regression function.  You can force the 
+                algorithm to use no more than a set number of vectors by setting 
+                the 3rd constructor argument to whatever you want.  However, note that 
+                doing this causes the algorithm to bias it's results towards more 
+                recent training examples.  
+        !*/
+
+    public:
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+
+        explicit krls (
+            const kernel_type& kernel_, 
+            scalar_type tolerance_ = 0.001,
+            unsigned long max_dictionary_size_ = 1000000
+        );
+        /*!
+            requires
+                - tolerance >= 0
+            ensures
+                - this object is properly initialized
+                - #tolerance() == tolerance_
+                - #get_decision_function().kernel_function == kernel_
+                  (i.e. this object will use the given kernel function)
+                - #get_kernel() == kernel_
+                - #max_dictionary_size() == max_dictionary_size_
+        !*/
+
+        scalar_type tolerance(
+        ) const;
+        /*!
+            ensures
+                - returns the tolerance to use for the approximately linearly dependent 
+                  test in the KRLS algorithm.  This is a number which governs how 
+                  accurately this object will approximate the decision function it is 
+                  learning.  Smaller values generally result in a more accurate 
+                  estimate while also resulting in a bigger set of dictionary vectors in 
+                  the learned decision function.  Bigger tolerances values result in a 
+                  less accurate decision function but also in less dictionary vectors.
+                - The exact meaning of the tolerance parameter is the following: 
+                  Imagine that we have an empirical_kernel_map that contains all
+                  the current dictionary vectors.  Then the tolerance is the minimum
+                  projection error (as given by empirical_kernel_map::project()) required
+                  to cause us to include a new vector in the dictionary.  So each time
+                  you call train() the krls object basically just computes the projection
+                  error for that new sample and if it is larger than the tolerance
+                  then that new sample becomes part of the dictionary.
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a const reference to the kernel used by this object
+        !*/
+
+        unsigned long max_dictionary_size(
+        ) const;
+        /*!
+            ensures
+                - returns the maximum number of dictionary vectors this object
+                  will use at a time.  That is, dictionary_size() will never be
+                  greater than max_dictionary_size().
+        !*/
+
+        void clear_dictionary (
+        );
+        /*!
+            ensures
+                - clears out all learned data 
+                  (e.g. #get_decision_function().basis_vectors.size() == 0)
+        !*/
+
+        scalar_type operator() (
+            const sample_type& x
+        ) const;
+        /*!
+            ensures
+                - returns the current y estimate for the given x
+        !*/
+
+        void train (
+            const sample_type& x,
+            scalar_type y
+        );
+        /*!
+            ensures
+                - trains this object that the given x should be mapped to the given y
+                - if (dictionary_size() == max_dictionary_size() and training
+                  would add another dictionary vector to this object) then
+                    - discards the oldest dictionary vector so that we can still
+                      add a new one and remain below the max number of dictionary
+                      vectors.
+        !*/
+
+        void swap (
+            krls& item
+        );
+        /*!
+            ensures
+                - swaps *this with item
+        !*/
+
+        unsigned long dictionary_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of vectors in the dictionary.  That is,
+                  returns a number equal to get_decision_function().basis_vectors.size()
+        !*/
+
+        decision_function<kernel_type> get_decision_function (
+        ) const;
+        /*!
+            ensures
+                - returns a decision function F that represents the function learned
+                  by this object so far.  I.e. it is the case that:
+                    - for all x: F(x) == (*this)(x)
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type
+        >
+    void swap(
+        krls<kernel_type>& a, 
+        krls<kernel_type>& b
+    )
+    { a.swap(b); }
+    /*!
+        provides a global swap function
+    !*/
+
+    template <
+        typename kernel_type
+        >
+    void serialize (
+        const krls<kernel_type>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for krls objects
+    !*/
+
+    template <
+        typename kernel_type 
+        >
+    void deserialize (
+        krls<kernel_type>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for krls objects
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_KRLs_ABSTRACT_
+
diff --git a/ml/dlib/dlib/svm/krr_trainer.h b/ml/dlib/dlib/svm/krr_trainer.h
new file mode 100644
index 000000000..a43431169
--- /dev/null
+++ b/ml/dlib/dlib/svm/krr_trainer.h
@@ -0,0 +1,368 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_KRR_TRAInER_Hh_
+#define DLIB_KRR_TRAInER_Hh_
+
+#include "../algs.h"
+#include "function.h"
+#include "kernel.h"
+#include "empirical_kernel_map.h"
+#include "linearly_independent_subset_finder.h"
+#include "../statistics.h"
+#include "rr_trainer.h"
+#include "krr_trainer_abstract.h"
+#include <vector>
+#include <iostream>
+
+namespace dlib
+{
+    template <
+        typename K 
+        >
+    class krr_trainer
+    {
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        krr_trainer (
+        ) :
+            verbose(false),
+            max_basis_size(400),
+            ekm_stale(true)
+        {
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+            trainer.be_verbose();
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+            trainer.be_quiet();
+        }
+
+        void use_regression_loss_for_loo_cv (
+        )
+        {
+            trainer.use_regression_loss_for_loo_cv();
+        }
+
+        void use_classification_loss_for_loo_cv (
+        )
+        {
+            trainer.use_classification_loss_for_loo_cv();
+        }
+
+        bool will_use_regression_loss_for_loo_cv (
+        ) const
+        {
+            return trainer.will_use_regression_loss_for_loo_cv();
+        }
+
+        const kernel_type get_kernel (
+        ) const
+        {
+            return kern;
+        }
+
+        void set_kernel (
+            const kernel_type& k
+        )
+        {
+            kern = k;
+        }
+
+        template <typename T>
+        void set_basis (
+            const T& basis_samples
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(basis_samples.size() > 0 && is_vector(mat(basis_samples)),
+                "\tvoid krr_trainer::set_basis(basis_samples)"
+                << "\n\t You have to give a non-empty set of basis_samples and it must be a vector"
+                << "\n\t basis_samples.size():                       " << basis_samples.size() 
+                << "\n\t is_vector(mat(basis_samples)): " << is_vector(mat(basis_samples)) 
+                << "\n\t this: " << this
+                );
+
+            basis = mat(basis_samples);
+            ekm_stale = true;
+        }
+
+        bool basis_loaded (
+        ) const
+        {
+            return (basis.size() != 0);
+        }
+
+        void clear_basis (
+        )
+        {
+            basis.set_size(0);
+            ekm.clear();
+            ekm_stale = true;
+        }
+
+        unsigned long get_max_basis_size (
+        ) const
+        {
+            return max_basis_size;
+        }
+
+        void set_max_basis_size (
+            unsigned long max_basis_size_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(max_basis_size_ > 0,
+                "\t void krr_trainer::set_max_basis_size()"
+                << "\n\t max_basis_size_ must be greater than 0"
+                << "\n\t max_basis_size_: " << max_basis_size_ 
+                << "\n\t this:            " << this
+                );
+
+            max_basis_size = max_basis_size_;
+        }
+
+        void set_lambda (
+            scalar_type lambda_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(lambda_ >= 0,
+                "\t void krr_trainer::set_lambda()"
+                << "\n\t lambda must be greater than or equal to 0"
+                << "\n\t lambda_: " << lambda_
+                << "\n\t this:   " << this
+                );
+
+            trainer.set_lambda(lambda_);
+        }
+
+        const scalar_type get_lambda (
+        ) const
+        {
+            return trainer.get_lambda();
+        }
+
+        template <typename EXP>
+        void set_search_lambdas (
+            const matrix_exp<EXP>& lambdas
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_vector(lambdas) && lambdas.size() > 0 && min(lambdas) > 0,
+                "\t void krr_trainer::set_search_lambdas()"
+                << "\n\t lambdas must be a non-empty vector of values"
+                << "\n\t is_vector(lambdas): " << is_vector(lambdas) 
+                << "\n\t lambdas.size():     " << lambdas.size()
+                << "\n\t min(lambdas):       " << min(lambdas) 
+                << "\n\t this:   " << this
+                );
+
+            trainer.set_search_lambdas(lambdas);
+        }
+
+        const matrix<scalar_type,0,0,mem_manager_type>& get_search_lambdas (
+        ) const
+        {
+            return trainer.get_search_lambdas();
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            std::vector<scalar_type> temp;
+            scalar_type temp2;
+            return do_train(mat(x), mat(y), false, temp, temp2);
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            std::vector<scalar_type>& loo_values
+        ) const
+        {
+            scalar_type temp;
+            return do_train(mat(x), mat(y), true, loo_values, temp);
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            std::vector<scalar_type>& loo_values,
+            scalar_type& lambda_used 
+        ) const
+        {
+            return do_train(mat(x), mat(y), true, loo_values, lambda_used);
+        }
+
+
+    private:
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            const bool output_loo_values,
+            std::vector<scalar_type>& loo_values,
+            scalar_type& the_lambda
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(x,y),
+                "\t decision_function krr_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t is_vector(x): " << is_vector(x)
+                << "\n\t is_vector(y): " << is_vector(y)
+                << "\n\t x.size():     " << x.size() 
+                << "\n\t y.size():     " << y.size() 
+                );
+
+#ifdef ENABLE_ASSERTS
+            if (get_lambda() == 0 && will_use_regression_loss_for_loo_cv() == false)
+            {
+                // make sure requires clause is not broken
+                DLIB_ASSERT(is_binary_classification_problem(x,y),
+                    "\t decision_function krr_trainer::train(x,y)"
+                    << "\n\t invalid inputs were given to this function"
+                    );
+            }
+#endif
+
+            // The first thing we do is make sure we have an appropriate ekm ready for use below.
+            if (basis_loaded())
+            {
+                if (ekm_stale)
+                {
+                    ekm.load(kern, basis);
+                    ekm_stale = false;
+                }
+            }
+            else
+            {
+                linearly_independent_subset_finder<kernel_type> lisf(kern, max_basis_size);
+                fill_lisf(lisf, x);
+                ekm.load(lisf);
+            }
+
+            if (verbose)
+            {
+                std::cout << "\nNumber of basis vectors used: " << ekm.out_vector_size() << std::endl;
+            }
+
+            typedef matrix<scalar_type,0,1,mem_manager_type> column_matrix_type;
+
+            running_stats<scalar_type> rs;
+
+            // Now we project all the x samples into kernel space using our EKM 
+            matrix<column_matrix_type,0,1,mem_manager_type > proj_x;
+            proj_x.set_size(x.size());
+            for (long i = 0; i < proj_x.size(); ++i)
+            {
+                scalar_type err;
+                // Note that we also append a 1 to the end of the vectors because this is
+                // a convenient way of dealing with the bias term later on.
+                if (verbose == false)
+                {
+                    proj_x(i) = ekm.project(x(i));
+                }
+                else
+                {
+                    proj_x(i) = ekm.project(x(i),err);
+                    rs.add(err);
+                }
+            }
+
+            if (verbose)
+            {
+                std::cout << "Mean EKM projection error:                  " << rs.mean() << std::endl;
+                std::cout << "Standard deviation of EKM projection error: " << rs.stddev() << std::endl;
+            }
+
+
+            decision_function<linear_kernel<matrix<scalar_type,0,0,mem_manager_type> > > lin_df;
+
+            if (output_loo_values)
+                lin_df = trainer.train(proj_x,y, loo_values, the_lambda);
+            else
+                lin_df = trainer.train(proj_x,y);
+
+            // convert the linear decision function into a kernelized one.
+            decision_function<kernel_type> df;
+            df = ekm.convert_to_decision_function(lin_df.basis_vectors(0));
+            df.b = lin_df.b; 
+
+            // If we used an automatically derived basis then there isn't any point in
+            // keeping the ekm around.  So free its memory.
+            if (basis_loaded() == false)
+            {
+                ekm.clear();
+            }
+
+            return df;
+        }
+
+
+        /*!
+            CONVENTION
+                - if (ekm_stale) then
+                    - kern or basis have changed since the last time
+                      they were loaded into the ekm
+
+                - get_lambda() == trainer.get_lambda()
+                - get_kernel() == kern
+                - get_max_basis_size() == max_basis_size
+                - will_use_regression_loss_for_loo_cv() == trainer.will_use_regression_loss_for_loo_cv() 
+                - get_search_lambdas() == trainer.get_search_lambdas() 
+
+                - basis_loaded() == (basis.size() != 0)
+        !*/
+
+        rr_trainer<linear_kernel<matrix<scalar_type,0,0,mem_manager_type> > > trainer;
+
+        bool verbose;
+
+
+        kernel_type kern;
+        unsigned long max_basis_size;
+
+        matrix<sample_type,0,1,mem_manager_type> basis;
+        mutable empirical_kernel_map<kernel_type> ekm;
+        mutable bool ekm_stale; 
+
+    }; 
+
+}
+
+#endif // DLIB_KRR_TRAInER_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/krr_trainer_abstract.h b/ml/dlib/dlib/svm/krr_trainer_abstract.h
new file mode 100644
index 000000000..399802f6b
--- /dev/null
+++ b/ml/dlib/dlib/svm/krr_trainer_abstract.h
@@ -0,0 +1,322 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_KRR_TRAInER_ABSTRACT_Hh_
+#ifdef DLIB_KRR_TRAInER_ABSTRACT_Hh_
+
+#include "../algs.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "empirical_kernel_map_abstract.h"
+
+namespace dlib
+{
+    template <
+        typename K 
+        >
+    class krr_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            INITIAL VALUE
+                - get_lambda() == 0
+                - basis_loaded() == false
+                - get_max_basis_size() == 400
+                - will_use_regression_loss_for_loo_cv() == true
+                - get_search_lambdas() == logspace(-9, 2, 50) 
+                - this object will not be verbose unless be_verbose() is called
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a tool for performing kernel ridge regression 
+                (This basic algorithm is also known my many other names, e.g. regularized 
+                least squares or least squares SVM). 
+
+                The exact definition of what this algorithm does is this:
+                    Find w and b that minimizes the following (x_i are input samples and y_i are target values):
+                        lambda*dot(w,w) + sum_over_i( (f(x_i) - y_i)^2 )
+                        where f(x) == dot(x,w) - b
+
+                    Except the dot products are replaced by kernel functions.  So this
+                    algorithm is just regular old least squares regression but with the
+                    addition of a regularization term which encourages small w and the
+                    application of the kernel trick.
+
+
+                It is implemented using the empirical_kernel_map and thus allows you 
+                to run the algorithm on large datasets and obtain sparse outputs.  It is also
+                capable of estimating the lambda parameter using leave-one-out cross-validation.
+
+
+                The leave-one-out cross-validation implementation is based on the techniques
+                discussed in this paper:
+                    Notes on Regularized Least Squares by Ryan M. Rifkin and Ross A. Lippert.
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        krr_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used.
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        const kernel_type get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object
+        !*/
+
+        void set_kernel (
+            const kernel_type& k
+        );
+        /*!
+            ensures
+                - #get_kernel() == k 
+        !*/
+
+        template <typename T>
+        void set_basis (
+            const T& basis_samples
+        );
+        /*!
+            requires
+                - T must be a dlib::matrix type or something convertible to a matrix via mat()
+                  (e.g. a std::vector)
+                - is_vector(basis_samples) == true
+                - basis_samples.size() > 0
+                - get_kernel() must be capable of operating on the elements of basis_samples.  That is,
+                  expressions such as get_kernel()(basis_samples(0), basis_samples(0)) should make sense.
+            ensures
+                - #basis_loaded() == true
+                - training will be carried out in the span of the given basis_samples
+        !*/
+
+        bool basis_loaded (
+        ) const;
+        /*!
+            ensures
+                - returns true if this object has been loaded with user supplied basis vectors and false otherwise.
+        !*/
+
+        void clear_basis (
+        );
+        /*!
+            ensures
+                - #basis_loaded() == false
+        !*/
+
+        unsigned long get_max_basis_size (
+        ) const;
+        /*!
+            ensures
+                - returns the maximum number of basis vectors this object is allowed
+                  to use.  This parameter only matters when the user has not supplied 
+                  a basis via set_basis().
+        !*/
+
+        void set_max_basis_size (
+            unsigned long max_basis_size
+        );
+        /*!
+            requires
+                - max_basis_size > 0
+            ensures
+                - #get_max_basis_size() == max_basis_size 
+        !*/
+
+        void set_lambda (
+            scalar_type lambda 
+        );
+        /*!
+            requires
+                - lambda >= 0
+            ensures
+                - #get_lambda() == lambda 
+        !*/
+
+        const scalar_type get_lambda (
+        ) const;
+        /*!
+            ensures
+                - returns the regularization parameter.  It is the parameter that 
+                  determines the trade off between trying to fit the training data 
+                  exactly or allowing more errors but hopefully improving the 
+                  generalization ability of the resulting function.  Smaller values 
+                  encourage exact fitting while larger values of lambda may encourage 
+                  better generalization. 
+
+                  Note that a lambda of 0 has a special meaning.  It indicates to this
+                  object that it should automatically determine an appropriate lambda
+                  value.  This is done using leave-one-out cross-validation.
+        !*/
+
+        void use_regression_loss_for_loo_cv (
+        );
+        /*!
+            ensures
+                - #will_use_regression_loss_for_loo_cv() == true
+        !*/
+
+        void use_classification_loss_for_loo_cv (
+        );
+        /*!
+            ensures
+                - #will_use_regression_loss_for_loo_cv() == false 
+        !*/
+
+        bool will_use_regression_loss_for_loo_cv (
+        ) const;
+        /*!
+            ensures
+                - returns true if the automatic lambda estimation will attempt to estimate a lambda
+                  appropriate for a regression task.  Otherwise it will try and find one which
+                  minimizes the number of classification errors.
+        !*/
+
+        template <typename EXP>
+        void set_search_lambdas (
+            const matrix_exp<EXP>& lambdas
+        );
+        /*!
+            requires
+                - is_vector(lambdas) == true
+                - lambdas.size() > 0
+                - min(lambdas) > 0
+                - lambdas must contain floating point numbers
+            ensures
+                - #get_search_lambdas() == lambdas
+        !*/
+
+        const matrix<scalar_type,0,0,mem_manager_type>& get_search_lambdas (
+        ) const;
+        /*!
+            ensures
+                - returns a matrix M such that:
+                    - is_vector(M) == true
+                    - M == a list of all the lambda values which will be tried when performing
+                      LOO cross-validation for determining the best lambda. 
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+                - is_learning_problem(x,y) == true
+                - if (get_lambda() == 0 && will_use_regression_loss_for_loo_cv() == false) then
+                    - is_binary_classification_problem(x,y) == true
+                      (i.e. if you want this algorithm to estimate a lambda appropriate for
+                      classification functions then you had better give a valid classification
+                      problem)
+            ensures
+                - performs kernel ridge regression given the training samples in x and target values in y.  
+                - returns a decision_function F with the following properties:
+                    - F(new_x) == predicted y value
+
+                - if (basis_loaded()) then
+                    - training will be carried out in the span of the user supplied basis vectors
+                - else
+                    - this object will attempt to automatically select an appropriate basis
+
+                - if (get_lambda() == 0) then
+                    - This object will perform internal leave-one-out cross-validation to determine an 
+                      appropriate lambda automatically.  It will compute the LOO error for each lambda
+                      in get_search_lambdas() and select the best one.
+                    - if (will_use_regression_loss_for_loo_cv()) then
+                        - the lambda selected will be the one that minimizes the mean squared error.
+                    - else
+                        - the lambda selected will be the one that minimizes the number classification 
+                          mistakes.  We say a point is classified correctly if the output of the
+                          decision_function has the same sign as its label.
+                    - #get_lambda() == 0
+                      (i.e. we don't change the get_lambda() value.  If you want to know what the
+                      automatically selected lambda value was then call the version of train()
+                      defined below)
+                - else
+                    - The user supplied value of get_lambda() will be used to perform the kernel
+                      ridge regression.
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            std::vector<scalar_type>& loo_values
+        ) const;
+        /*!
+            requires
+                - all the requirements for train(x,y) must be satisfied
+            ensures
+                - returns train(x,y)
+                  (i.e. executes train(x,y) and returns its result)
+                - #loo_values.size() == y.size()
+                - for all valid i:
+                    - #loo_values[i] == leave-one-out prediction for the value of y(i) based 
+                      on all the training samples other than (x(i),y(i)).
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            std::vector<scalar_type>& loo_values,
+            scalar_type& lambda_used 
+        ) const;
+        /*!
+            requires
+                - all the requirements for train(x,y) must be satisfied
+            ensures
+                - returns train(x,y)
+                  (i.e. executes train(x,y) and returns its result)
+                - #loo_values.size() == y.size()
+                - for all valid i:
+                    - #loo_values[i] == leave-one-out prediction for the value of y(i) based 
+                      on all the training samples other than (x(i),y(i)).
+                - #lambda_used == the value of lambda used to generate the 
+                  decision_function.  Note that this lambda value is always 
+                  equal to get_lambda() if get_lambda() isn't 0.
+        !*/
+
+    }; 
+
+}
+
+#endif // DLIB_KRR_TRAInER_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/linearly_independent_subset_finder.h b/ml/dlib/dlib/svm/linearly_independent_subset_finder.h
new file mode 100644
index 000000000..3bac0df2c
--- /dev/null
+++ b/ml/dlib/dlib/svm/linearly_independent_subset_finder.h
@@ -0,0 +1,540 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_LISfh_
+#define DLIB_LISfh_
+
+#include <vector>
+
+#include "linearly_independent_subset_finder_abstract.h"
+#include "../matrix.h"
+#include "function.h"
+#include "../std_allocator.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include "../is_kind.h"
+#include "../string.h"
+#include "../rand.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename kernel_type>
+    class linearly_independent_subset_finder
+    {
+        /*!
+            INITIAL VALUE
+                - min_strength == 0
+                - min_vect_idx == 0
+                - K_inv.size() == 0
+                - K.size() == 0
+                - dictionary.size() == 0
+
+            CONVENTION
+                - max_dictionary_size() == my_max_dictionary_size
+                - get_kernel() == kernel
+                - minimum_tolerance() == min_tolerance
+                - size() == dictionary.size()
+                - get_dictionary() == mat(dictionary)
+                - K.nr() == dictionary.size()
+                - K.nc() == dictionary.size()
+                - for all valid r,c:
+                    - K(r,c) == kernel(dictionary[r], dictionary[c])
+                - K_inv == inv(K)
+
+                - if (dictionary.size() == my_max_dictionary_size) then
+                    - for all valid 0 < i < dictionary.size():
+                        - Let STRENGTHS[i] == the delta you would get for dictionary[i] (i.e. Approximately 
+                          Linearly Dependent value) if you removed dictionary[i] from this object and then 
+                          tried to add it back in.
+                        - min_strength == the minimum value from STRENGTHS
+                        - min_vect_idx == the index of the element in STRENGTHS with the smallest value
+        !*/
+
+    public:
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::sample_type type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+        linearly_independent_subset_finder (
+        ) : 
+            my_max_dictionary_size(100),
+            min_tolerance(0.001)
+        {
+            clear_dictionary();
+        }
+
+        linearly_independent_subset_finder (
+            const kernel_type& kernel_, 
+            unsigned long max_dictionary_size_,
+            scalar_type min_tolerance_ = 0.001
+        ) : 
+            kernel(kernel_), 
+            my_max_dictionary_size(max_dictionary_size_),
+            min_tolerance(min_tolerance_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(min_tolerance_ > 0 && max_dictionary_size_ > 1,
+                "\tlinearly_independent_subset_finder()"
+                << "\n\tinvalid argument to constructor"
+                << "\n\tmin_tolerance_: " << min_tolerance_
+                << "\n\tmax_dictionary_size_: " << max_dictionary_size_
+                << "\n\tthis:           " << this
+                );
+            clear_dictionary();
+        }
+
+        unsigned long max_dictionary_size() const
+        {
+            return my_max_dictionary_size;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel;
+        }
+
+        scalar_type minimum_tolerance(
+        ) const
+        {
+            return min_tolerance;
+        }
+
+        void set_minimum_tolerance (
+            scalar_type min_tol
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(min_tol > 0,
+                "\tlinearly_independent_subset_finder::set_minimum_tolerance()"
+                << "\n\tinvalid argument to this function"
+                << "\n\tmin_tol: " << min_tol
+                << "\n\tthis:    " << this
+                );
+            min_tolerance = min_tol;
+        }
+
+        void clear_dictionary ()
+        {
+            dictionary.clear();
+            min_strength = 0;
+            min_vect_idx = 0;
+
+            K_inv.set_size(0,0);
+            K.set_size(0,0);
+        }
+
+        scalar_type projection_error (
+            const sample_type& x
+        ) const
+        {
+            const scalar_type kx = kernel(x,x);
+            if (dictionary.size() == 0)
+            {
+                return kx;
+            }
+            else
+            {
+                // fill in k
+                k.set_size(dictionary.size());
+                for (long r = 0; r < k.nr(); ++r)
+                    k(r) = kernel(x,dictionary[r]);
+
+                // compute the error we would have if we approximated the new x sample
+                // with the dictionary.  That is, do the ALD test from the KRLS paper.
+                a = K_inv*k;
+                scalar_type delta = kx - trans(k)*a;
+
+                return delta;
+            }
+        }
+
+        bool add (
+            const sample_type& x
+        )
+        {
+            const scalar_type kx = kernel(x,x);
+            if (dictionary.size() == 0)
+            {
+                // just ignore this sample if it is the zero vector (or really close to being zero)
+                if (std::abs(kx) > std::numeric_limits<scalar_type>::epsilon())
+                {
+                    // set initial state since this is the first sample we have seen
+                    K_inv.set_size(1,1);
+                    K_inv(0,0) = 1/kx;
+
+                    K.set_size(1,1);
+                    K(0,0) = kx;
+
+                    dictionary.push_back(x);
+                    return true;
+                }
+                return false;
+            }
+            else
+            {
+                // fill in k
+                k.set_size(dictionary.size());
+                for (long r = 0; r < k.nr(); ++r)
+                    k(r) = kernel(x,dictionary[r]);
+
+                // compute the error we would have if we approximated the new x sample
+                // with the dictionary.  That is, do the ALD test from the KRLS paper.
+                a = K_inv*k;
+                scalar_type delta = kx - trans(k)*a;
+
+                // if this new vector is approximately linearly independent of the vectors
+                // in our dictionary.  
+                if (delta > min_strength && delta > min_tolerance)
+                {
+                    if (dictionary.size() == my_max_dictionary_size)
+                    {
+                        // if we have never computed the min_strength then we should compute it 
+                        if (min_strength == 0)
+                            recompute_min_strength();
+
+                        const long i = min_vect_idx;
+
+                        // replace the min strength vector with x.  Put the new vector onto the end of
+                        // dictionary and remove the vector at position i.
+                        dictionary.erase(dictionary.begin()+i);
+                        dictionary.push_back(x);
+
+                        // compute reduced K_inv.
+                        // Remove the i'th vector from the inverse kernel matrix.  This formula is basically
+                        // just the reverse of the way K_inv is updated by equation 3.14 below.
+                        temp = removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i);
+
+                        // recompute these guys since they were computed with the old
+                        // kernel matrix
+                        k2 = remove_row(k,i);
+                        a2 = temp*k2;
+                        delta = kx - trans(k2)*a2;
+
+                        // now update temp with the new dictionary vector
+                        // update the middle part of the matrix
+                        set_subm(K_inv, get_rect(temp)) = temp + a2*trans(a2)/delta;
+                        // update the right column of the matrix
+                        set_subm(K_inv, 0, temp.nr(),temp.nr(),1) = -a2/delta;
+                        // update the bottom row of the matrix
+                        set_subm(K_inv, temp.nr(), 0, 1, temp.nr()) = trans(-a2/delta);
+                        // update the bottom right corner of the matrix
+                        K_inv(temp.nr(), temp.nc()) = 1/delta;
+
+                        // now update the kernel matrix K
+                        set_subm(K,get_rect(temp)) = removerc(K, i,i);
+                        set_subm(K, 0, K.nr()-1,K.nr()-1,1) = k2;
+                        // update the bottom row of the matrix
+                        set_subm(K, K.nr()-1, 0, 1, K.nr()-1) = trans(k2);
+                        K(K.nr()-1, K.nc()-1) = kx;
+
+                        // now we have to recompute the min_strength in this case
+                        recompute_min_strength();
+                    }
+                    else
+                    {
+                        // update K_inv by computing the new one in the temp matrix (equation 3.14 from Engel)
+                        temp.set_size(K_inv.nr()+1, K_inv.nc()+1);
+                        // update the middle part of the matrix
+                        set_subm(temp, get_rect(K_inv)) = K_inv + a*trans(a)/delta;
+                        // update the right column of the matrix
+                        set_subm(temp, 0, K_inv.nr(),K_inv.nr(),1) = -a/delta;
+                        // update the bottom row of the matrix
+                        set_subm(temp, K_inv.nr(), 0, 1, K_inv.nr()) = trans(-a/delta);
+                        // update the bottom right corner of the matrix
+                        temp(K_inv.nr(), K_inv.nc()) = 1/delta;
+                        // put temp into K_inv
+                        temp.swap(K_inv);
+
+
+                        // update K (the kernel matrix)
+                        temp.set_size(K.nr()+1, K.nc()+1);
+                        set_subm(temp, get_rect(K)) = K;
+                        // update the right column of the matrix
+                        set_subm(temp, 0, K.nr(),K.nr(),1) = k;
+                        // update the bottom row of the matrix
+                        set_subm(temp, K.nr(), 0, 1, K.nr()) = trans(k);
+                        temp(K.nr(), K.nc()) = kx;
+                        // put temp into K
+                        temp.swap(K);
+
+
+                        // add x to the dictionary
+                        dictionary.push_back(x);
+
+                    }
+                    return true;
+                }
+                else
+                {
+                    return false;
+                }
+            }
+        }
+
+        void swap (
+            linearly_independent_subset_finder& item
+        )
+        {
+            exchange(kernel, item.kernel);
+            dictionary.swap(item.dictionary);
+            exchange(min_strength, item.min_strength);
+            exchange(min_vect_idx, item.min_vect_idx);
+            K_inv.swap(item.K_inv);
+            K.swap(item.K);
+            exchange(my_max_dictionary_size, item.my_max_dictionary_size);
+            exchange(min_tolerance, item.min_tolerance);
+
+            // non-state temp members
+            a.swap(item.a);
+            k.swap(item.k);
+            a2.swap(item.a2);
+            k2.swap(item.k2);
+            temp.swap(item.temp);
+        }
+
+        size_t size (
+        ) const { return dictionary.size(); }
+
+        const matrix<sample_type,0,1,mem_manager_type> get_dictionary (
+        ) const
+        { 
+            return mat(dictionary);
+        }
+
+        friend void serialize(const linearly_independent_subset_finder& item, std::ostream& out)
+        {
+            serialize(item.kernel, out);
+            serialize(item.dictionary, out);
+            serialize(item.min_strength, out);
+            serialize(item.min_vect_idx, out);
+            serialize(item.K_inv, out);
+            serialize(item.K, out);
+            serialize(item.my_max_dictionary_size, out);
+            serialize(item.min_tolerance, out);
+        }
+
+        friend void deserialize(linearly_independent_subset_finder& item, std::istream& in)
+        {
+            deserialize(item.kernel, in);
+            deserialize(item.dictionary, in);
+            deserialize(item.min_strength, in);
+            deserialize(item.min_vect_idx, in);
+            deserialize(item.K_inv, in);
+            deserialize(item.K, in);
+            deserialize(item.my_max_dictionary_size, in);
+            deserialize(item.min_tolerance, in);
+        }
+
+        const sample_type& operator[] (
+            unsigned long index
+        ) const
+        {
+            return dictionary[index];
+        }
+
+        const matrix<scalar_type,0,0,mem_manager_type>& get_kernel_matrix (
+        ) const
+        {
+            return K;
+        }
+
+        const matrix<scalar_type,0,0,mem_manager_type>& get_inv_kernel_marix (
+        ) const
+        {
+            return K_inv;
+        }
+
+    private:
+
+        typedef std_allocator<sample_type, mem_manager_type> alloc_sample_type;
+        typedef std_allocator<scalar_type, mem_manager_type> alloc_scalar_type;
+        typedef std::vector<sample_type,alloc_sample_type> dictionary_vector_type;
+        typedef std::vector<scalar_type,alloc_scalar_type> scalar_vector_type;
+
+        void recompute_min_strength (
+        )
+        /*!
+            ensures
+                - recomputes the min_strength and min_vect_idx values
+                  so that they are correct with respect to the CONVENTION
+        !*/
+        {
+            min_strength = std::numeric_limits<scalar_type>::max();
+
+            // here we loop over each dictionary vector and compute what its delta would be if
+            // we were to remove it from the dictionary and then try to add it back in.
+            for (unsigned long i = 0; i < dictionary.size(); ++i)
+            {
+                // compute a2 = K_inv*k but where dictionary vector i has been removed
+                a2 = (removerc(K_inv,i,i) - remove_row(colm(K_inv,i)/K_inv(i,i),i)*remove_col(rowm(K_inv,i),i)) *
+                    (remove_row(colm(K,i),i));
+                scalar_type delta = K(i,i) - trans(remove_row(colm(K,i),i))*a2;
+
+                if (delta < min_strength)
+                {
+                    min_strength = delta;
+                    min_vect_idx = i;
+                }
+            }
+        }
+
+
+        kernel_type kernel;
+        dictionary_vector_type dictionary;
+        scalar_type min_strength;
+        unsigned long min_vect_idx;
+
+        matrix<scalar_type,0,0,mem_manager_type> K_inv;
+        matrix<scalar_type,0,0,mem_manager_type> K;
+
+        unsigned long my_max_dictionary_size;
+        scalar_type min_tolerance;
+
+        // temp variables here just so we don't have to reconstruct them over and over.  Thus, 
+        // they aren't really part of the state of this object.
+        mutable matrix<scalar_type,0,1,mem_manager_type> a, a2;
+        mutable matrix<scalar_type,0,1,mem_manager_type> k, k2;
+        mutable matrix<scalar_type,0,0,mem_manager_type> temp;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename kernel_type>
+    void swap(linearly_independent_subset_finder<kernel_type>& a, linearly_independent_subset_finder<kernel_type>& b)
+    { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    const matrix_op<op_array_to_mat<linearly_independent_subset_finder<T> > > mat (
+        const linearly_independent_subset_finder<T>& m 
+    )
+    {
+        typedef op_array_to_mat<linearly_independent_subset_finder<T> > op;
+        return matrix_op<op>(op(m));
+    }
+
+// ----------------------------------------------------------------------------------------
+    namespace impl
+    {
+        template <
+            typename kernel_type,
+            typename vector_type,
+            typename rand_type
+            >
+        void fill_lisf (
+            linearly_independent_subset_finder<kernel_type>& lisf,
+            const vector_type& samples,
+            rand_type& rnd,
+            int sampling_size 
+        )
+        {   
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_vector(samples) && sampling_size > 0,
+                "\t void fill_lisf()"
+                << "\n\t invalid arguments to this function"
+                << "\n\t is_vector(samples): " << is_vector(samples) 
+                << "\n\t sampling_size: " << sampling_size
+                );
+
+            // no need to do anything if there aren't any samples
+            if (samples.size() == 0)
+                return;
+
+            typedef typename kernel_type::scalar_type scalar_type;
+
+            // Start out by guessing what a reasonable projection error tolerance is. We will use
+            // the biggest projection error we see in a small sample.
+            scalar_type tol = 0;
+            for (int i = 0; i < sampling_size; ++i)
+            {
+                const unsigned long idx = rnd.get_random_32bit_number()%samples.size();
+                const scalar_type temp = lisf.projection_error(samples(idx)); 
+                if (temp > tol)
+                    tol = temp;
+            }
+
+            const scalar_type min_tol = lisf.minimum_tolerance();
+
+            // run many rounds of random sampling.  In each round we drop the tolerance lower.
+            while (tol >= min_tol && lisf.size() < lisf.max_dictionary_size())
+            {
+                tol *= 0.5;
+                lisf.set_minimum_tolerance(std::max(tol, min_tol));
+                int add_failures = 0;
+
+                // Keep picking random samples and adding them into the lisf.  Stop when we either
+                // fill it up or can't find any more samples with projection error larger than the
+                // current tolerance.
+                while (lisf.size() < lisf.max_dictionary_size() && add_failures < sampling_size) 
+                {
+                    if (lisf.add(samples(rnd.get_random_32bit_number()%samples.size())) == false)
+                    {
+                        ++add_failures;
+                    }
+                }
+            }
+
+            // set this back to its original value
+            lisf.set_minimum_tolerance(min_tol);
+        }
+    }
+
+    template <
+        typename kernel_type,
+        typename vector_type
+        >
+    void fill_lisf (
+        linearly_independent_subset_finder<kernel_type>& lisf,
+        const vector_type& samples
+    )
+    {   
+        dlib::rand rnd;
+        impl::fill_lisf(lisf, mat(samples),rnd, 2000);
+    }
+
+    template <
+        typename kernel_type,
+        typename vector_type,
+        typename rand_type
+        >
+    typename enable_if<is_rand<rand_type> >::type fill_lisf (
+        linearly_independent_subset_finder<kernel_type>& lisf,
+        const vector_type& samples,
+        rand_type& rnd,
+        const int sampling_size = 2000
+    )
+    {   
+        impl::fill_lisf(lisf, mat(samples),rnd, sampling_size);
+    }
+
+    template <
+        typename kernel_type,
+        typename vector_type,
+        typename rand_type
+        >
+    typename disable_if<is_rand<rand_type> >::type fill_lisf (
+        linearly_independent_subset_finder<kernel_type>& lisf,
+        const vector_type& samples,
+        rand_type random_seed,
+        const int sampling_size = 2000
+    )
+    {   
+        dlib::rand rnd;
+        rnd.set_seed(cast_to_string(random_seed));
+        impl::fill_lisf(lisf, mat(samples), rnd, sampling_size);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LISfh_
+
diff --git a/ml/dlib/dlib/svm/linearly_independent_subset_finder_abstract.h b/ml/dlib/dlib/svm/linearly_independent_subset_finder_abstract.h
new file mode 100644
index 000000000..3224f9a0a
--- /dev/null
+++ b/ml/dlib/dlib/svm/linearly_independent_subset_finder_abstract.h
@@ -0,0 +1,327 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_LISf_ABSTRACT_
+#ifdef DLIB_LISf_ABSTRACT_
+
+#include "../algs.h"
+#include "../serialize.h"
+#include "kernel_abstract.h"
+
+namespace dlib
+{
+
+    template <
+        typename kernel_type
+        >
+    class linearly_independent_subset_finder
+    {
+        /*!
+            REQUIREMENTS ON kernel_type
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            INITIAL VALUE
+                - size() == 0
+
+            WHAT THIS OBJECT REPRESENTS
+                This is an implementation of an online algorithm for recursively finding a
+                set (aka dictionary) of linearly independent vectors in a kernel induced 
+                feature space.  To use it you decide how large you would like the dictionary 
+                to be and then you feed it sample points.  
+
+                The implementation uses the Approximately Linearly Dependent metric described 
+                in the paper The Kernel Recursive Least Squares Algorithm by Yaakov Engel to 
+                decide which points are more linearly independent than others.  The metric is 
+                simply the squared distance between a test point and the subspace spanned by 
+                the set of dictionary vectors.
+
+                Each time you present this object with a new sample point (via this->add()) 
+                it calculates the projection distance and if it is sufficiently large then this 
+                new point is included into the dictionary.  Note that this object can be configured 
+                to have a maximum size.  Once the max dictionary size is reached each new point 
+                kicks out a previous point.  This is done by removing the dictionary vector that 
+                has the smallest projection distance onto the others.  That is, the "least linearly 
+                independent" vector is removed to make room for the new one.
+        !*/
+
+    public:
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::sample_type type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+        linearly_independent_subset_finder (
+        );
+        /*!
+            ensures
+                - #minimum_tolerance() == 0.001 
+                - this object is properly initialized
+                - #get_kernel() == kernel_type()  (i.e. whatever the default is for the supplied kernel) 
+                - #max_dictionary_size() == 100 
+        !*/
+
+        linearly_independent_subset_finder (
+            const kernel_type& kernel_, 
+            unsigned long max_dictionary_size_,
+            scalar_type min_tolerance = 0.001
+        );
+        /*!
+            requires
+                - min_tolerance > 0
+                - max_dictionary_size > 1
+            ensures
+                - #minimum_tolerance() == min_tolerance
+                - this object is properly initialized
+                - #get_kernel() == kernel_
+                - #max_dictionary_size() == max_dictionary_size_
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a const reference to the kernel used by this object
+        !*/
+
+        unsigned long max_dictionary_size(
+        ) const;
+        /*!
+            ensures
+                - returns the maximum number of dictionary vectors this object
+                  will accumulate.  That is, size() will never be
+                  greater than max_dictionary_size().
+        !*/
+
+        scalar_type minimum_tolerance(
+        ) const;
+        /*!
+            ensures
+                - returns the minimum projection error necessary to include a sample point
+                  into the dictionary.   
+        !*/
+
+        void set_minimum_tolerance (
+            scalar_type min_tolerance 
+        );
+        /*!
+            requires
+                - min_tolerance > 0
+            ensures
+                - #minimum_tolerance() == min_tolerance
+        !*/
+
+        void clear_dictionary (
+        );
+        /*!
+            ensures
+                - clears out all the data (e.g. #size() == 0)
+        !*/
+
+        bool add (
+            const sample_type& x
+        );
+        /*!
+            ensures
+                - if (size() < max_dictionary_size() then
+                    - if (projection_error(x) > minimum_tolerance()) then 
+                        - adds x into the dictionary
+                        - (*this)[#size()-1] == x
+                        - #size() == size() + 1
+                        - returns true
+                    - else
+                        - the dictionary is not changed
+                        - returns false
+                - else
+                    - #size() == size() 
+                      (i.e. the number of vectors in this object doesn't change)
+                    - since the dictionary is full adding a new element means we have to 
+                      remove one of the current ones.  So let proj_error[i] be equal to the 
+                      projection error obtained when projecting dictionary vector (*this)[i] 
+                      onto the other elements of the dictionary.  Then let min_proj_error 
+                      be equal to the minimum value in proj_error.  The dictionary element
+                      with the minimum projection error is the "least linearly independent"
+                      vector in the dictionary and is the one which will be removed to make
+                      room for a new element.
+                    - if (projection_error(x) > minimum_tolerance() && projection_error(x) > min_proj_error)
+                        - the least linearly independent vector in this object is removed
+                        - adds x into the dictionary
+                        - (*this)[#size()-1] == x
+                        - returns true
+                    - else
+                        - the dictionary is not changed
+                        - returns false
+        !*/
+
+        scalar_type projection_error (
+            const sample_type& x
+        ) const;
+        /*!
+            ensures
+                - returns the squared distance between x and the subspace spanned by 
+                  the set of dictionary vectors.  (e.g. this is the same number that
+                  gets returned by the empirical_kernel_map::project() function's 
+                  projection_error argument when the ekm is loaded with the dictionary
+                  vectors.)
+                - Note that if the dictionary is empty then the return value is
+                  equal to get_kernel()(x,x).
+        !*/
+
+        void swap (
+            linearly_independent_subset_finder& item
+        );
+        /*!
+            ensures
+                - swaps *this with item
+        !*/
+
+        size_t size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of vectors in the dictionary.  
+        !*/
+
+        const sample_type& operator[] (
+            unsigned long index
+        ) const;
+        /*!
+            requires
+                - index < size()
+            ensures
+                - returns the index'th element in the set of linearly independent 
+                  vectors contained in this object.
+        !*/
+
+        const matrix<sample_type,0,1,mem_manager_type> get_dictionary (
+        ) const;
+        /*!
+            ensures
+                - returns a column vector that contains all the dictionary
+                  vectors in this object.
+        !*/
+
+        const matrix<scalar_type,0,0,mem_manager_type>& get_kernel_matrix (
+        ) const;
+        /*!
+            ensures
+                - returns a matrix K such that:
+                    - K.nr() == K.nc() == size()
+                    - K == kernel_matrix(get_kernel(), get_dictionary())
+                      i.e. K == the kernel matrix for the dictionary vectors
+        !*/
+
+        const matrix<scalar_type,0,0,mem_manager_type>& get_inv_kernel_marix (
+        ) const;
+        /*!
+            ensures
+                - if (size() != 0)
+                    - returns inv(get_kernel_matrix())
+                - else
+                    - returns an empty matrix
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type
+        >
+    void swap(
+        linearly_independent_subset_finder<kernel_type>& a, 
+        linearly_independent_subset_finder<kernel_type>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap function
+    !*/
+
+    template <
+        typename kernel_type
+        >
+    void serialize (
+        const linearly_independent_subset_finder<kernel_type>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for linearly_independent_subset_finder objects
+    !*/
+
+    template <
+        typename kernel_type 
+        >
+    void deserialize (
+        linearly_independent_subset_finder<kernel_type>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for linearly_independent_subset_finder objects
+    !*/
+
+    template <
+        typename T
+        >
+    const matrix_exp mat (
+        const linearly_independent_subset_finder<T>& m 
+    );
+    /*!
+        ensures
+            - converts m into a matrix
+            - returns a matrix R such that:
+                - is_col_vector(R) == true 
+                - R.size() == m.size()
+                - for all valid r:
+                  R(r) == m[r]
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename vector_type,
+        typename rand_type
+        >
+    void fill_lisf (
+        linearly_independent_subset_finder<kernel_type>& lisf,
+        const vector_type& samples,
+        rand_type& rnd,
+        int sampling_size = 2000
+    );
+    /*!
+        requires
+            - vector_type == a dlib::matrix or something convertible to one via 
+              mat()
+            - is_vector(mat(samples)) == true
+            - rand_type == an implementation of rand/rand_kernel_abstract.h or a type
+              convertible to a string via cast_to_string()
+            - sampling_size > 0
+        ensures
+            - The purpose of this function is to fill lisf with points from samples.  It does
+              this by randomly sampling elements of samples until no more can be added.  The
+              precise stopping condition is when sampling_size additions to lisf have failed
+              or the max dictionary size has been reached.
+            - This function employs a random number generator.  If rand_type is a random 
+              number generator then it uses the instance given.  Otherwise it uses cast_to_string(rnd)
+              to seed a new random number generator.
+    !*/
+
+    template <
+        typename kernel_type,
+        typename vector_type
+        >
+    void fill_lisf (
+        linearly_independent_subset_finder<kernel_type>& lisf,
+        const vector_type& samples
+    );
+    /*!
+        requires
+            - vector_type == a dlib::matrix or something convertible to one via 
+              mat()
+            - is_vector(mat(samples)) == true
+        ensures
+            - performs fill_lisf(lisf, samples, default_rand_generator, 2000)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_LISf_ABSTRACT_
+
diff --git a/ml/dlib/dlib/svm/multiclass_tools.h b/ml/dlib/dlib/svm/multiclass_tools.h
new file mode 100644
index 000000000..d97e8aa04
--- /dev/null
+++ b/ml/dlib/dlib/svm/multiclass_tools.h
@@ -0,0 +1,68 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_MULTICLASS_TOoLS_Hh_
+#define DLIB_MULTICLASS_TOoLS_Hh_
+
+#include "multiclass_tools_abstract.h"
+
+#include <vector>
+#include <set>
+#include "../unordered_pair.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename label_type>
+    std::vector<label_type> select_all_distinct_labels (
+        const std::vector<label_type>& labels
+    )
+    {
+        std::set<label_type> temp;
+        temp.insert(labels.begin(), labels.end());
+        return std::vector<label_type>(temp.begin(), temp.end());
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename label_type, typename U>
+    std::vector<unordered_pair<label_type> > find_missing_pairs (
+        const std::map<unordered_pair<label_type>,U>& bdfs 
+    )
+    {
+        typedef std::map<unordered_pair<label_type>,U> map_type;
+
+        // find all the labels
+        std::set<label_type> temp;
+        for (typename map_type::const_iterator i = bdfs.begin(); i != bdfs.end(); ++i)
+        {
+            temp.insert(i->first.first);
+            temp.insert(i->first.second);
+        }
+
+        std::vector<unordered_pair<label_type> > missing_pairs;
+
+        // now make sure all label pairs are present
+        typename std::set<label_type>::const_iterator i, j;
+        for (i = temp.begin(); i != temp.end(); ++i)
+        {
+            for (j = i, ++j; j != temp.end(); ++j)
+            {
+                const unordered_pair<label_type> p(*i, *j);
+
+                if (bdfs.count(p) == 0)
+                    missing_pairs.push_back(p);
+            }
+        }
+
+        return missing_pairs;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_MULTICLASS_TOoLS_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/multiclass_tools_abstract.h b/ml/dlib/dlib/svm/multiclass_tools_abstract.h
new file mode 100644
index 000000000..9e7774d3f
--- /dev/null
+++ b/ml/dlib/dlib/svm/multiclass_tools_abstract.h
@@ -0,0 +1,45 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_MULTICLASS_TOoLS_ABSTRACT_Hh_
+#ifdef DLIB_MULTICLASS_TOoLS_ABSTRACT_Hh_
+
+#include <vector>
+#include <map>
+#include "../unordered_pair.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename label_type>
+    std::vector<label_type> select_all_distinct_labels (
+        const std::vector<label_type>& labels
+    );
+    /*!
+        ensures
+            - Determines all distinct values present in labels and stores them
+              into a sorted vector and returns it.  They are sorted in ascending 
+              order.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename label_type, typename U>
+    std::vector<unordered_pair<label_type> > find_missing_pairs (
+        const std::map<unordered_pair<label_type>,U>& binary_decision_functions 
+    );
+    /*!
+        ensures
+            - Let L denote the set of all label_type values present in binary_decision_functions.
+            - This function finds all the label pairs with both elements distinct and in L but
+              not also in binary_decision_functions.  All these missing pairs are stored
+              in a sorted vector and returned.  They are sorted in ascending order.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_MULTICLASS_TOoLS_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/null_df.h b/ml/dlib/dlib/svm/null_df.h
new file mode 100644
index 000000000..2cbbf04a7
--- /dev/null
+++ b/ml/dlib/dlib/svm/null_df.h
@@ -0,0 +1,33 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_NULL_DECISION_FUnCTION_Hh_
+#define DLIB_NULL_DECISION_FUnCTION_Hh_
+
+#include <iostream>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    struct null_df
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is a type used to represent an unused field in the list of template 
+                arguments of the one_vs_one_decision_function and one_vs_all_decision_function 
+                templates.  As such, null_df doesn't actually do anything.
+        !*/
+        template <typename T>
+        double operator() ( const T&) const { return 0; }
+    };
+
+    inline void serialize(const null_df&, std::ostream&) {}
+    inline void deserialize(null_df&, std::istream&) {}
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_NULL_DECISION_FUnCTION_Hh_
+
diff --git a/ml/dlib/dlib/svm/null_trainer.h b/ml/dlib/dlib/svm/null_trainer.h
new file mode 100644
index 000000000..015b00c15
--- /dev/null
+++ b/ml/dlib/dlib/svm/null_trainer.h
@@ -0,0 +1,61 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_NULL_TRAINERs_H_
+#define DLIB_NULL_TRAINERs_H_
+
+#include "null_trainer_abstract.h"
+#include "../algs.h"
+#include "function_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename dec_funct_type 
+        >
+    class null_trainer_type
+    {
+    public:
+        typedef typename dec_funct_type::kernel_type kernel_type;
+        typedef typename dec_funct_type::scalar_type scalar_type;
+        typedef typename dec_funct_type::sample_type sample_type;
+        typedef typename dec_funct_type::mem_manager_type mem_manager_type;
+        typedef dec_funct_type trained_function_type;
+
+        null_trainer_type (
+        ){}
+
+        null_trainer_type (
+            const dec_funct_type& dec_funct_
+        ) : dec_funct(dec_funct_) {}
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const dec_funct_type& train (
+            const in_sample_vector_type& ,
+            const in_scalar_vector_type& 
+        ) const { return dec_funct; }
+
+    private:
+        dec_funct_type dec_funct;
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename dec_funct_type
+        >
+    const null_trainer_type<dec_funct_type> null_trainer (
+        const dec_funct_type& dec_funct
+    ) { return null_trainer_type<dec_funct_type>(dec_funct); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_NULL_TRAINERs_H_
+
diff --git a/ml/dlib/dlib/svm/null_trainer_abstract.h b/ml/dlib/dlib/svm/null_trainer_abstract.h
new file mode 100644
index 000000000..25f6a5443
--- /dev/null
+++ b/ml/dlib/dlib/svm/null_trainer_abstract.h
@@ -0,0 +1,101 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_NULL_TRAINERs_ABSTRACT_
+#ifdef DLIB_NULL_TRAINERs_ABSTRACT_
+
+#include "../algs.h"
+#include "function_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename dec_funct_type 
+        >
+    class null_trainer_type
+    {
+        /*!
+            REQUIREMENTS ON dec_funct_type
+                dec_funct_type can be any copyable type that provides the needed 
+                typedefs used below (e.g. kernel_type, scalar_type, etc.).
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a simple tool for turning a decision function 
+                into a trainer object that always returns the original decision
+                function when you try to train with it.  
+
+                dlib contains a few "training post processing" algorithms (e.g. 
+                reduced() and reduced2()).  These tools take in a trainer object,
+                tell it to perform training, and then they take the output decision
+                function and do some kind of post processing to it.  The null_trainer_type 
+                object is useful because you can use it to run an already
+                learned decision function through the training post processing
+                algorithms by turning a decision function into a null_trainer_type
+                and then giving it to a post processor.  
+        !*/
+
+    public:
+        typedef typename dec_funct_type::kernel_type kernel_type;
+        typedef typename dec_funct_type::scalar_type scalar_type;
+        typedef typename dec_funct_type::sample_type sample_type;
+        typedef typename dec_funct_type::mem_manager_type mem_manager_type;
+        typedef dec_funct_type trained_function_type;
+
+        null_trainer_type (
+        );
+        /*!
+            ensures
+                - any call to this->train(x,y) will return a default initialized
+                  dec_funct_type object.
+        !*/
+
+        null_trainer_type (
+            const dec_funct_type& dec_funct
+        );
+        /*!
+            ensures
+                - any call to this->train(x,y) will always return a copy of
+                  the given dec_funct object.
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const dec_funct_type& train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the decision function object given to
+                  this object's constructor.
+        !*/
+
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename dec_funct_type
+        >
+    const null_trainer_type<dec_funct_type> null_trainer (
+        const dec_funct_type& dec_funct
+    ) { return null_trainer_type<dec_funct_type>(dec_funct); }
+    /*!
+        ensures
+            - returns a null_trainer_type object that has been instantiated with 
+              the given arguments.  That is, this function returns a null_trainer_type
+              trainer that will return a copy of the given dec_funct object every time 
+              someone calls its train() function.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_NULL_TRAINERs_ABSTRACT_
+
+
diff --git a/ml/dlib/dlib/svm/num_nonnegative_weights.h b/ml/dlib/dlib/svm/num_nonnegative_weights.h
new file mode 100644
index 000000000..4f21f9b69
--- /dev/null
+++ b/ml/dlib/dlib/svm/num_nonnegative_weights.h
@@ -0,0 +1,76 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_NUM_NONNEGATIVE_WEIGHtS_Hh_
+#define DLIB_NUM_NONNEGATIVE_WEIGHtS_Hh_
+
+#include "../enable_if.h"
+
+namespace dlib
+{
+
+    namespace impl2
+    {
+        template <
+            typename T,
+            unsigned long (T::*funct)()const
+            >
+        struct hnnf_helper
+        {
+            typedef char type;
+        };
+
+        template <typename T>
+        char has_num_nonnegative_weights_helper( typename hnnf_helper<T,&T::num_nonnegative_weights>::type = 0 ) { return 0;}
+
+        struct two_bytes
+        {
+            char a[2]; 
+        };
+
+        template <typename T>
+        two_bytes has_num_nonnegative_weights_helper(int) { return two_bytes();}
+
+        template <typename T>
+        struct work_around_visual_studio_bug
+        {
+            const static unsigned long U = sizeof(has_num_nonnegative_weights_helper<T>('a'));
+        };
+
+
+        // This is a template to tell you if a feature_extractor has a num_nonnegative_weights function or not.
+        template <typename T, unsigned long U = work_around_visual_studio_bug<T>::U > 
+        struct has_num_nonnegative_weights 
+        {
+            static const bool value = false;
+        };
+
+        template <typename T>
+        struct has_num_nonnegative_weights <T,1>
+        {
+            static const bool value = true;
+        };
+
+
+    }
+
+    // call fe.num_nonnegative_weights() if it exists, otherwise return 0.
+    template <typename feature_extractor>
+    typename enable_if<impl2::has_num_nonnegative_weights<feature_extractor>,unsigned long>::type num_nonnegative_weights (
+    const feature_extractor& fe
+    )
+    {
+        return fe.num_nonnegative_weights();
+    }
+
+    template <typename feature_extractor>
+    typename disable_if<impl2::has_num_nonnegative_weights<feature_extractor>,unsigned long>::type num_nonnegative_weights (
+    const feature_extractor& /*fe*/
+    )
+    {
+        return 0;
+    }
+
+}
+
+#endif // DLIB_NUM_NONNEGATIVE_WEIGHtS_Hh_
+
diff --git a/ml/dlib/dlib/svm/one_vs_all_decision_function.h b/ml/dlib/dlib/svm/one_vs_all_decision_function.h
new file mode 100644
index 000000000..8afa52344
--- /dev/null
+++ b/ml/dlib/dlib/svm/one_vs_all_decision_function.h
@@ -0,0 +1,265 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_ONE_VS_ALL_DECISION_FUnCTION_Hh_
+#define DLIB_ONE_VS_ALL_DECISION_FUnCTION_Hh_
+
+#include "one_vs_all_decision_function_abstract.h"
+
+#include "../serialize.h"
+#include "../type_safe_union.h"
+#include <sstream>
+#include <map>
+#include "../any.h"
+#include "null_df.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename one_vs_all_trainer,
+        typename DF1 = null_df, typename DF2 = null_df, typename DF3 = null_df,
+        typename DF4 = null_df, typename DF5 = null_df, typename DF6 = null_df,
+        typename DF7 = null_df, typename DF8 = null_df, typename DF9 = null_df,
+        typename DF10 = null_df
+        >
+    class one_vs_all_decision_function
+    {
+    public:
+
+        typedef typename one_vs_all_trainer::label_type result_type;
+        typedef typename one_vs_all_trainer::sample_type sample_type;
+        typedef typename one_vs_all_trainer::scalar_type scalar_type;
+        typedef typename one_vs_all_trainer::mem_manager_type mem_manager_type;
+
+        typedef std::map<result_type, any_decision_function<sample_type, scalar_type> > binary_function_table;
+
+        one_vs_all_decision_function() :num_classes(0) {}
+
+        explicit one_vs_all_decision_function(
+            const binary_function_table& dfs_
+        ) : dfs(dfs_)
+        {
+            num_classes = dfs.size();
+        }
+
+        const binary_function_table& get_binary_decision_functions (
+        ) const
+        {
+            return dfs;
+        }
+
+        const std::vector<result_type> get_labels (
+        ) const
+        {
+            std::vector<result_type> temp;
+            temp.reserve(dfs.size());
+            for (typename binary_function_table::const_iterator i = dfs.begin(); i != dfs.end(); ++i)
+            {
+                temp.push_back(i->first);
+            }
+            return temp;
+        }
+
+
+        template <
+            typename df1, typename df2, typename df3, typename df4, typename df5,
+            typename df6, typename df7, typename df8, typename df9, typename df10
+            >
+        one_vs_all_decision_function (
+            const one_vs_all_decision_function<one_vs_all_trainer, 
+                                               df1, df2, df3, df4, df5,
+                                               df6, df7, df8, df9, df10>& item
+        ) : dfs(item.get_binary_decision_functions()), num_classes(item.number_of_classes()) {}
+
+        unsigned long number_of_classes (
+        ) const
+        {
+            return num_classes;
+        }
+
+        std::pair<result_type, scalar_type> predict (
+            const sample_type& sample
+        ) const
+        {
+            DLIB_ASSERT(number_of_classes() != 0, 
+                "\t pair<result_type,scalar_type> one_vs_all_decision_function::predict()"
+                << "\n\t You can't make predictions with an empty decision function."
+                << "\n\t this: " << this
+                );
+
+            result_type best_label = result_type();
+            scalar_type best_score = -std::numeric_limits<scalar_type>::infinity();
+
+            // run all the classifiers over the sample and find the best one
+            for(typename binary_function_table::const_iterator i = dfs.begin(); i != dfs.end(); ++i)
+            {
+                const scalar_type score = i->second(sample);
+
+                if (score > best_score)
+                {
+                    best_score = score;
+                    best_label = i->first;
+                }
+            }
+
+            return std::make_pair(best_label, best_score);
+        }
+
+        result_type operator() (
+            const sample_type& sample
+        ) const
+        {
+            DLIB_ASSERT(number_of_classes() != 0, 
+                "\t result_type one_vs_all_decision_function::operator()"
+                << "\n\t You can't make predictions with an empty decision function."
+                << "\n\t this: " << this
+                );
+
+            return predict(sample).first;
+        }
+
+
+
+    private:
+        binary_function_table dfs;
+        unsigned long num_classes;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename DF1, typename DF2, typename DF3,
+        typename DF4, typename DF5, typename DF6,
+        typename DF7, typename DF8, typename DF9,
+        typename DF10 
+        >
+    void serialize(
+        const one_vs_all_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, 
+        std::ostream& out
+    )
+    {
+        try
+        {
+            type_safe_union<DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10> temp;
+            typedef typename T::label_type result_type;
+            typedef typename T::sample_type sample_type;
+            typedef typename T::scalar_type scalar_type;
+            typedef std::map<result_type, any_decision_function<sample_type, scalar_type> > binary_function_table;
+
+            const unsigned long version = 1;
+            serialize(version, out);
+
+            const unsigned long size = item.get_binary_decision_functions().size();
+            serialize(size, out);
+
+            for(typename binary_function_table::const_iterator i = item.get_binary_decision_functions().begin(); 
+                i != item.get_binary_decision_functions().end(); ++i)
+            {
+                serialize(i->first, out);
+
+                if      (i->second.template contains<DF1>()) temp.template get<DF1>() = any_cast<DF1>(i->second);
+                else if (i->second.template contains<DF2>()) temp.template get<DF2>() = any_cast<DF2>(i->second);
+                else if (i->second.template contains<DF3>()) temp.template get<DF3>() = any_cast<DF3>(i->second);
+                else if (i->second.template contains<DF4>()) temp.template get<DF4>() = any_cast<DF4>(i->second);
+                else if (i->second.template contains<DF5>()) temp.template get<DF5>() = any_cast<DF5>(i->second);
+                else if (i->second.template contains<DF6>()) temp.template get<DF6>() = any_cast<DF6>(i->second);
+                else if (i->second.template contains<DF7>()) temp.template get<DF7>() = any_cast<DF7>(i->second);
+                else if (i->second.template contains<DF8>()) temp.template get<DF8>() = any_cast<DF8>(i->second);
+                else if (i->second.template contains<DF9>()) temp.template get<DF9>() = any_cast<DF9>(i->second);
+                else if (i->second.template contains<DF10>()) temp.template get<DF10>() = any_cast<DF10>(i->second);
+                else throw serialization_error("Can't serialize one_vs_all_decision_function.  Not all decision functions defined.");
+
+                serialize(temp,out);
+            }
+        }
+        catch (serialization_error& e)
+        {
+            throw serialization_error(e.info + "\n   while serializing an object of type one_vs_all_decision_function");
+        }
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl_ova
+    {
+        template <typename sample_type, typename scalar_type>
+        struct copy_to_df_helper
+        {
+            copy_to_df_helper(any_decision_function<sample_type, scalar_type>& target_) : target(target_) {}
+
+            any_decision_function<sample_type, scalar_type>& target;
+
+            template <typename T>
+            void operator() (
+                const T& item
+            ) const
+            {
+                target = item;
+            }
+        };
+    }
+
+    template <
+        typename T,
+        typename DF1, typename DF2, typename DF3,
+        typename DF4, typename DF5, typename DF6,
+        typename DF7, typename DF8, typename DF9,
+        typename DF10 
+        >
+    void deserialize(
+        one_vs_all_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, 
+        std::istream& in 
+    )
+    {
+        try
+        {
+            type_safe_union<DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10> temp;
+            typedef typename T::label_type result_type;
+            typedef typename T::sample_type sample_type;
+            typedef typename T::scalar_type scalar_type;
+            typedef impl_ova::copy_to_df_helper<sample_type, scalar_type> copy_to;
+
+            unsigned long version;
+            deserialize(version, in);
+
+            if (version != 1)
+                throw serialization_error("Can't deserialize one_vs_all_decision_function.  Wrong version.");
+
+            unsigned long size;
+            deserialize(size, in);
+
+            typedef std::map<result_type, any_decision_function<sample_type, scalar_type> > binary_function_table;
+            binary_function_table dfs;
+
+            result_type l;
+            for (unsigned long i = 0; i < size; ++i)
+            {
+                deserialize(l, in);
+                deserialize(temp, in);
+                if (temp.template contains<null_df>())
+                    throw serialization_error("A sub decision function of unknown type was encountered.");
+
+                temp.apply_to_contents(copy_to(dfs[l]));
+            }
+
+            item = one_vs_all_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>(dfs);
+        }
+        catch (serialization_error& e)
+        {
+            throw serialization_error(e.info + "\n   while deserializing an object of type one_vs_all_decision_function");
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ONE_VS_ALL_DECISION_FUnCTION_Hh_
+
+
+
diff --git a/ml/dlib/dlib/svm/one_vs_all_decision_function_abstract.h b/ml/dlib/dlib/svm/one_vs_all_decision_function_abstract.h
new file mode 100644
index 000000000..8daacb8d6
--- /dev/null
+++ b/ml/dlib/dlib/svm/one_vs_all_decision_function_abstract.h
@@ -0,0 +1,214 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_ONE_VS_ALL_DECISION_FUnCTION_ABSTRACT_Hh_
+#ifdef DLIB_ONE_VS_ALL_DECISION_FUnCTION_ABSTRACT_Hh_
+
+
+#include "../serialize.h"
+#include <map>
+#include "../any/any_decision_function_abstract.h"
+#include "one_vs_all_trainer_abstract.h"
+#include "null_df.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename one_vs_all_trainer,
+        typename DF1 = null_df, typename DF2 = null_df, typename DF3 = null_df,
+        typename DF4 = null_df, typename DF5 = null_df, typename DF6 = null_df,
+        typename DF7 = null_df, typename DF8 = null_df, typename DF9 = null_df,
+        typename DF10 = null_df
+        >
+    class one_vs_all_decision_function
+    {
+        /*!
+            REQUIREMENTS ON one_vs_all_trainer
+                This should be an instantiation of the one_vs_all_trainer template.  
+                It is used to infer which types are used for various things, such as 
+                representing labels.
+
+            REQUIREMENTS ON DF*
+                These types can either be left at their default values or set
+                to any kind of decision function object capable of being
+                stored in an any_decision_function<sample_type,scalar_type>
+                object.  These types should also be serializable.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a multiclass classifier built out of a set of 
+                binary classifiers.  Each binary classifier is used to vote for the 
+                correct multiclass label using a one vs. all strategy.  Therefore, 
+                if you have N classes then there will be N binary classifiers inside 
+                this object.
+
+                Note that the DF* template arguments are only used if you want
+                to serialize and deserialize one_vs_all_decision_function objects. 
+                Specifically, all the types of binary decision function contained
+                within a one_vs_all_decision_function must be listed in the
+                template arguments if serialization and deserialization is to
+                be used.
+
+            THREAD SAFETY
+                It is always safe to use distinct instances of this object in different
+                threads.  However, when a single instance is shared between threads then
+                the following rules apply:
+                    It is safe to call the const members of this object from multiple
+                    threads so long as all the decision functions contained in this object
+                    are also threadsafe.  This is because the const members are purely
+                    read-only operations.  However, any operation that modifies a
+                    one_vs_all_decision_function is not threadsafe.
+        !*/
+    public:
+
+        typedef typename one_vs_all_trainer::label_type result_type;
+        typedef typename one_vs_all_trainer::sample_type sample_type;
+        typedef typename one_vs_all_trainer::scalar_type scalar_type;
+        typedef typename one_vs_all_trainer::mem_manager_type mem_manager_type;
+
+        typedef std::map<result_type, any_decision_function<sample_type, scalar_type> > binary_function_table;
+
+        one_vs_all_decision_function(
+        );
+        /*!
+            ensures
+                - #number_of_classes() == 0
+                - #get_binary_decision_functions().size() == 0
+                - #get_labels().size() == 0
+        !*/
+
+        explicit one_vs_all_decision_function(
+            const binary_function_table& decision_functions
+        ); 
+        /*!
+            ensures
+                - #get_binary_decision_functions() == decision_functions
+                - #get_labels() == a list of all the labels which appear in the
+                  given set of decision functions
+                - #number_of_classes() == #get_labels().size() 
+        !*/
+
+        template <
+            typename df1, typename df2, typename df3, typename df4, typename df5,
+            typename df6, typename df7, typename df8, typename df9, typename df10
+            >
+        one_vs_all_decision_function (
+            const one_vs_all_decision_function<one_vs_all_trainer, 
+                                               df1, df2, df3, df4, df5,
+                                               df6, df7, df8, df9, df10>& item
+        ); 
+        /*!
+            ensures
+                - #*this will be a copy of item
+                - #number_of_classes() == item.number_of_classes()
+                - #get_labels() == item.get_labels()
+                - #get_binary_decision_functions() == item.get_binary_decision_functions()
+        !*/
+
+        const binary_function_table& get_binary_decision_functions (
+        ) const;
+        /*!
+            ensures
+                - returns the table of binary decision functions used by this
+                  object.  The label given to a test sample is computed by
+                  determining which binary decision function has the largest
+                  (i.e. most positive) output and returning the label associated
+                  with that decision function.
+        !*/
+
+        const std::vector<result_type> get_labels (
+        ) const;
+        /*!
+            ensures
+                - returns a vector containing all the labels which can be
+                  predicted by this object.
+        !*/
+
+        unsigned long number_of_classes (
+        ) const;
+        /*!
+            ensures
+                - returns get_labels().size()
+                  (i.e. returns the number of different labels/classes predicted by
+                  this object)
+        !*/
+
+        std::pair<result_type, scalar_type> predict (
+            const sample_type& sample 
+        ) const;
+        /*!
+            requires
+                - number_of_classes() != 0
+            ensures
+                - Evaluates all the decision functions in get_binary_decision_functions()
+                  and returns the predicted label and score for the input sample.  That is,
+                  returns std::make_pair(label,score)
+                - The label is determined by whichever classifier outputs the largest
+                  score.  
+        !*/
+
+        result_type operator() (
+            const sample_type& sample
+        ) const
+        /*!
+            requires
+                - number_of_classes() != 0
+            ensures
+                - Evaluates all the decision functions in get_binary_decision_functions()
+                  and returns the predicted label.  That is, returns predict(sample).first.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename DF1, typename DF2, typename DF3,
+        typename DF4, typename DF5, typename DF6,
+        typename DF7, typename DF8, typename DF9,
+        typename DF10 
+        >
+    void serialize(
+        const one_vs_all_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, 
+        std::ostream& out
+    );
+    /*!
+        ensures
+            - writes the given item to the output stream out.
+        throws
+            - serialization_error.  
+              This is thrown if there is a problem writing to the ostream or if item 
+              contains a type of decision function not listed among the DF* template 
+              arguments.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename DF1, typename DF2, typename DF3,
+        typename DF4, typename DF5, typename DF6,
+        typename DF7, typename DF8, typename DF9,
+        typename DF10 
+        >
+    void deserialize(
+        one_vs_all_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, 
+        std::istream& in 
+    );
+    /*!
+        ensures
+            - deserializes a one_vs_all_decision_function from in and stores it in item.
+        throws
+            - serialization_error.  
+              This is thrown if there is a problem reading from the istream or if the
+              serialized data contains decision functions not listed among the DF*
+              template arguments.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ONE_VS_ALL_DECISION_FUnCTION_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/one_vs_all_trainer.h b/ml/dlib/dlib/svm/one_vs_all_trainer.h
new file mode 100644
index 000000000..bcb006a41
--- /dev/null
+++ b/ml/dlib/dlib/svm/one_vs_all_trainer.h
@@ -0,0 +1,234 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_ONE_VS_ALL_TRAiNER_Hh_
+#define DLIB_ONE_VS_ALL_TRAiNER_Hh_
+
+#include "one_vs_all_trainer_abstract.h"
+
+#include "one_vs_all_decision_function.h"
+#include <vector>
+
+#include "multiclass_tools.h"
+
+#include <sstream>
+#include <iostream>
+
+#include "../any.h"
+#include <map>
+#include <set>
+#include "../threads.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename any_trainer,
+        typename label_type_ = double
+        >
+    class one_vs_all_trainer
+    {
+    public:
+        typedef label_type_ label_type;
+
+        typedef typename any_trainer::sample_type sample_type;
+        typedef typename any_trainer::scalar_type scalar_type;
+        typedef typename any_trainer::mem_manager_type mem_manager_type;
+
+        typedef one_vs_all_decision_function<one_vs_all_trainer> trained_function_type;
+
+        one_vs_all_trainer (
+        ) : 
+            verbose(false),
+            num_threads(4)
+        {}
+
+        void set_trainer (
+            const any_trainer& trainer
+        )
+        {
+            default_trainer = trainer;
+            trainers.clear();
+        }
+
+        void set_trainer (
+            const any_trainer& trainer,
+            const label_type& l
+        )
+        {
+            trainers[l] = trainer;
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void set_num_threads (
+            unsigned long num
+        )
+        {
+            num_threads = num;
+        }
+
+        unsigned long get_num_threads (
+        ) const
+        {
+            return num_threads;
+        }
+
+        struct invalid_label : public dlib::error 
+        { 
+            invalid_label(const std::string& msg, const label_type& l_
+                ) : dlib::error(msg), l(l_) {};
+
+            virtual ~invalid_label(
+            ) throw() {}
+
+            label_type l;
+        };
+
+        trained_function_type train (
+            const std::vector<sample_type>& all_samples,
+            const std::vector<label_type>& all_labels
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(all_samples,all_labels),
+                "\t trained_function_type one_vs_all_trainer::train(all_samples,all_labels)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t all_samples.size():     " << all_samples.size() 
+                << "\n\t all_labels.size():      " << all_labels.size() 
+                );
+
+            const std::vector<label_type> distinct_labels = select_all_distinct_labels(all_labels);
+
+            // make sure we have a trainer object for each of the label types.
+            for (unsigned long i = 0; i < distinct_labels.size(); ++i)
+            {
+                const label_type l = distinct_labels[i];
+                const typename binary_function_table::const_iterator itr = trainers.find(l);
+
+                if (itr == trainers.end() && default_trainer.is_empty())
+                {
+                    std::ostringstream sout;
+                    sout << "In one_vs_all_trainer, no trainer registered for the " << l << " label.";
+                    throw invalid_label(sout.str(), l);
+                }
+            }
+
+
+            // now do the training
+            parallel_for_helper helper(all_samples,all_labels,default_trainer,trainers,verbose,distinct_labels);
+            parallel_for(num_threads, 0, distinct_labels.size(), helper, 500);
+
+            if (helper.error_message.size() != 0)
+            {
+                throw dlib::error("binary trainer threw while training one vs. all classifier.  Error was: " + helper.error_message);
+            }
+            return trained_function_type(helper.dfs);
+        }
+
+    private:
+
+        typedef std::map<label_type, any_trainer> binary_function_table;
+        struct parallel_for_helper
+        {
+            parallel_for_helper(
+                const std::vector<sample_type>& all_samples_,
+                const std::vector<label_type>& all_labels_,
+                const any_trainer& default_trainer_,
+                const binary_function_table& trainers_,
+                const bool verbose_,
+                const std::vector<label_type>& distinct_labels_ 
+            ) : 
+                all_samples(all_samples_),
+                all_labels(all_labels_),
+                default_trainer(default_trainer_),
+                trainers(trainers_), 
+                verbose(verbose_),
+                distinct_labels(distinct_labels_)
+            {}
+
+            void operator()(long i) const 
+            {
+                try
+                {
+                    std::vector<scalar_type> labels;
+
+                    const label_type l = distinct_labels[i];
+
+                    // setup one of the one vs all training sets
+                    for (unsigned long k = 0; k < all_samples.size(); ++k)
+                    {
+                        if (all_labels[k] == l)
+                            labels.push_back(+1);
+                        else 
+                            labels.push_back(-1);
+                    }
+
+
+                    if (verbose)
+                    {
+                        auto_mutex lock(class_mutex);
+                        std::cout << "Training classifier for " << l << " vs. all" << std::endl;
+                    }
+
+                    any_trainer trainer;
+                    // now train a binary classifier using the samples we selected
+                    { auto_mutex lock(class_mutex); 
+                    const typename binary_function_table::const_iterator itr = trainers.find(l);
+                    if (itr != trainers.end())
+                        trainer = itr->second;
+                    else 
+                        trainer = default_trainer;
+                    }
+
+                    any_decision_function<sample_type,scalar_type> binary_df = trainer.train(all_samples, labels);
+
+                    auto_mutex lock(class_mutex);
+                    dfs[l] = binary_df;
+                }
+                catch (std::exception& e)
+                {
+                    auto_mutex lock(class_mutex);
+                    error_message = e.what();
+                }
+            }
+
+            mutable typename trained_function_type::binary_function_table dfs;
+            mutex class_mutex;
+            mutable std::string error_message;
+
+            const std::vector<sample_type>& all_samples;
+            const std::vector<label_type>& all_labels;
+            const any_trainer& default_trainer;
+            const binary_function_table& trainers;
+            const bool verbose;
+            const std::vector<label_type>& distinct_labels;
+        };
+
+        any_trainer default_trainer;
+
+        binary_function_table trainers;
+
+        bool verbose;
+        unsigned long num_threads;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ONE_VS_ALL_TRAiNER_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/one_vs_all_trainer_abstract.h b/ml/dlib/dlib/svm/one_vs_all_trainer_abstract.h
new file mode 100644
index 000000000..fb719a7e4
--- /dev/null
+++ b/ml/dlib/dlib/svm/one_vs_all_trainer_abstract.h
@@ -0,0 +1,163 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_ONE_VS_ALL_TRAiNER_ABSTRACT_Hh_
+#ifdef DLIB_ONE_VS_ALL_TRAiNER_ABSTRACT_Hh_
+
+
+#include "one_vs_all_decision_function_abstract.h"
+#include <vector>
+
+#include "../any/any_trainer_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename any_trainer,
+        typename label_type_ = double
+        >
+    class one_vs_all_trainer
+    {
+        /*!
+            REQUIREMENTS ON any_trainer
+                must be an instantiation of the dlib::any_trainer template.   
+
+            REQUIREMENTS ON label_type_
+                label_type_ must be default constructable, copyable, and comparable using
+                operator < and ==.  It must also be possible to write it to an std::ostream
+                using operator<<.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for turning a bunch of binary classifiers into a 
+                multiclass classifier.  It does this by training the binary classifiers 
+                in a one vs. all fashion.  That is, if you have N possible classes then 
+                it trains N binary classifiers which are then used to vote on the identity 
+                of a test sample.
+
+                This object works with any kind of binary classification trainer object
+                capable of being assigned to an any_trainer object.  (e.g. the svm_nu_trainer) 
+        !*/
+
+    public:
+
+
+        typedef label_type_ label_type;
+
+        typedef typename any_trainer::sample_type sample_type;
+        typedef typename any_trainer::scalar_type scalar_type;
+        typedef typename any_trainer::mem_manager_type mem_manager_type;
+
+        typedef one_vs_all_decision_function<one_vs_all_trainer> trained_function_type;
+
+        one_vs_all_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized.
+                - This object will not be verbose unless be_verbose() is called.
+                - No binary trainers are associated with *this.  I.e. you have to
+                  call set_trainer() before calling train().
+                - #get_num_threads() == 4
+        !*/
+
+        void set_trainer (
+            const any_trainer& trainer
+        );
+        /*!
+            ensures
+                - sets the trainer used for all binary subproblems.  Any previous 
+                  calls to set_trainer() are overridden by this function.  Even the
+                  more specific set_trainer(trainer, l) form. 
+        !*/
+
+        void set_trainer (
+            const any_trainer& trainer,
+            const label_type& l
+        );
+        /*!
+            ensures
+                - Sets the trainer object used to create a binary classifier to
+                  distinguish l labeled samples from all other samples.
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_num_threads (
+            unsigned long num
+        );
+        /*!
+            ensures
+                - #get_num_threads() == num
+        !*/
+
+        unsigned long get_num_threads (
+        ) const;
+        /*!
+            ensures
+                - returns the number of threads used during training.  You should 
+                  usually set this equal to the number of processing cores on your
+                  machine.
+        !*/
+
+        struct invalid_label : public dlib::error 
+        { 
+            /*!
+                This is the exception thrown by the train() function below.
+            !*/
+            label_type l;
+        };
+
+        trained_function_type train (
+            const std::vector<sample_type>& all_samples,
+            const std::vector<label_type>& all_labels
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(all_samples, all_labels)
+            ensures
+                - trains a bunch of binary classifiers in a one vs all fashion to solve the given 
+                  multiclass classification problem.  
+                - returns a one_vs_all_decision_function F with the following properties:
+                    - F contains all the learned binary classifiers and can be used to predict
+                      the labels of new samples.
+                    - if (new_x is a sample predicted to have a label of L) then
+                        - F(new_x) == L
+                    - F.get_labels() == select_all_distinct_labels(all_labels)
+                    - F.number_of_classes() == select_all_distinct_labels(all_labels).size()
+            throws
+                - invalid_label
+                  This exception is thrown if there are labels in all_labels which don't have
+                  any corresponding trainer object.  This will never happen if set_trainer(trainer)
+                  has been called.  However, if only the set_trainer(trainer,l) form has been
+                  used then this exception is thrown if not all labels have been given a trainer.
+
+                  invalid_label::l will contain the label which is missing a trainer object.  
+                  Additionally, the exception will contain an informative error message available 
+                  via invalid_label::what().
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ONE_VS_ALL_TRAiNER_ABSTRACT_Hh_
+
+
+
diff --git a/ml/dlib/dlib/svm/one_vs_one_decision_function.h b/ml/dlib/dlib/svm/one_vs_one_decision_function.h
new file mode 100644
index 000000000..02a5fa51e
--- /dev/null
+++ b/ml/dlib/dlib/svm/one_vs_one_decision_function.h
@@ -0,0 +1,291 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_ONE_VS_ONE_DECISION_FUnCTION_Hh_
+#define DLIB_ONE_VS_ONE_DECISION_FUnCTION_Hh_
+
+#include "one_vs_one_decision_function_abstract.h"
+
+#include "../serialize.h"
+#include "../type_safe_union.h"
+#include <iostream>
+#include <sstream>
+#include <set>
+#include <map>
+#include "../any.h"
+#include "../unordered_pair.h"
+#include "null_df.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename one_vs_one_trainer,
+        typename DF1 = null_df, typename DF2 = null_df, typename DF3 = null_df,
+        typename DF4 = null_df, typename DF5 = null_df, typename DF6 = null_df,
+        typename DF7 = null_df, typename DF8 = null_df, typename DF9 = null_df,
+        typename DF10 = null_df
+        >
+    class one_vs_one_decision_function
+    {
+    public:
+
+        typedef typename one_vs_one_trainer::label_type result_type;
+        typedef typename one_vs_one_trainer::sample_type sample_type;
+        typedef typename one_vs_one_trainer::scalar_type scalar_type;
+        typedef typename one_vs_one_trainer::mem_manager_type mem_manager_type;
+
+        typedef std::map<unordered_pair<result_type>, any_decision_function<sample_type, scalar_type> > binary_function_table;
+
+        one_vs_one_decision_function() :num_classes(0) {}
+
+        explicit one_vs_one_decision_function(
+            const binary_function_table& dfs_
+        ) : dfs(dfs_)
+        {
+#ifdef ENABLE_ASSERTS
+            {
+                const std::vector<unordered_pair<result_type> > missing_pairs = find_missing_pairs(dfs_);
+                if (missing_pairs.size() != 0)
+                {
+                    std::ostringstream sout;
+                    for (unsigned long i = 0; i < missing_pairs.size(); ++i)
+                    {
+                        sout << "\t      (" << missing_pairs[i].first << ", " << missing_pairs[i].second << ")\n";
+                    }
+                    DLIB_ASSERT(missing_pairs.size() == 0, 
+                        "\t void one_vs_one_decision_function::one_vs_one_decision_function()"
+                        << "\n\t The supplied set of binary decision functions is incomplete."
+                        << "\n\t this: " << this
+                        << "\n\t Classifiers are missing for the following label pairs: \n" << sout.str()
+                                );
+                }
+            }
+#endif
+
+            // figure out how many labels are covered by this set of binary decision functions
+            std::set<result_type> labels;
+            for (typename binary_function_table::const_iterator i = dfs.begin(); i != dfs.end(); ++i)
+            {
+                labels.insert(i->first.first);
+                labels.insert(i->first.second);
+            }
+            num_classes = labels.size();
+        }
+
+        const binary_function_table& get_binary_decision_functions (
+        ) const
+        {
+            return dfs;
+        }
+
+        const std::vector<result_type> get_labels (
+        ) const
+        {
+            std::set<result_type> labels;
+            for (typename binary_function_table::const_iterator i = dfs.begin(); i != dfs.end(); ++i)
+            {
+                labels.insert(i->first.first);
+                labels.insert(i->first.second);
+            }
+            return std::vector<result_type>(labels.begin(), labels.end());
+        }
+
+
+        template <
+            typename df1, typename df2, typename df3, typename df4, typename df5,
+            typename df6, typename df7, typename df8, typename df9, typename df10
+            >
+        one_vs_one_decision_function (
+            const one_vs_one_decision_function<one_vs_one_trainer, 
+                                               df1, df2, df3, df4, df5,
+                                               df6, df7, df8, df9, df10>& item
+        ) : dfs(item.get_binary_decision_functions()), num_classes(item.number_of_classes()) {}
+
+        unsigned long number_of_classes (
+        ) const
+        {
+            return num_classes;
+        }
+
+        result_type operator() (
+            const sample_type& sample
+        ) const
+        {
+            DLIB_ASSERT(number_of_classes() != 0, 
+                "\t void one_vs_one_decision_function::operator()"
+                << "\n\t You can't make predictions with an empty decision function."
+                << "\n\t this: " << this
+                );
+
+            std::map<result_type,int> votes;
+
+            // run all the classifiers over the sample
+            for(typename binary_function_table::const_iterator i = dfs.begin(); i != dfs.end(); ++i)
+            {
+                const scalar_type score = i->second(sample);
+
+                if (score > 0)
+                    votes[i->first.first] += 1;
+                else
+                    votes[i->first.second] += 1;
+            }
+
+            // now figure out who had the most votes
+            result_type best_label = result_type();
+            int best_votes = 0;
+            for (typename std::map<result_type,int>::iterator i = votes.begin(); i != votes.end(); ++i)
+            {
+                if (i->second > best_votes)
+                {
+                    best_votes = i->second;
+                    best_label = i->first;
+                }
+            }
+
+            return best_label;
+        }
+
+
+
+    private:
+        binary_function_table dfs;
+        unsigned long num_classes;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename DF1, typename DF2, typename DF3,
+        typename DF4, typename DF5, typename DF6,
+        typename DF7, typename DF8, typename DF9,
+        typename DF10 
+        >
+    void serialize(
+        const one_vs_one_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, 
+        std::ostream& out
+    )
+    {
+        try
+        {
+            type_safe_union<DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10> temp;
+            typedef typename T::label_type result_type;
+            typedef typename T::sample_type sample_type;
+            typedef typename T::scalar_type scalar_type;
+            typedef std::map<unordered_pair<result_type>, any_decision_function<sample_type, scalar_type> > binary_function_table;
+
+            const unsigned long version = 1;
+            serialize(version, out);
+
+            const unsigned long size = item.get_binary_decision_functions().size();
+            serialize(size, out);
+
+            for(typename binary_function_table::const_iterator i = item.get_binary_decision_functions().begin(); 
+                i != item.get_binary_decision_functions().end(); ++i)
+            {
+                serialize(i->first, out);
+
+                if      (i->second.template contains<DF1>()) temp.template get<DF1>() = any_cast<DF1>(i->second);
+                else if (i->second.template contains<DF2>()) temp.template get<DF2>() = any_cast<DF2>(i->second);
+                else if (i->second.template contains<DF3>()) temp.template get<DF3>() = any_cast<DF3>(i->second);
+                else if (i->second.template contains<DF4>()) temp.template get<DF4>() = any_cast<DF4>(i->second);
+                else if (i->second.template contains<DF5>()) temp.template get<DF5>() = any_cast<DF5>(i->second);
+                else if (i->second.template contains<DF6>()) temp.template get<DF6>() = any_cast<DF6>(i->second);
+                else if (i->second.template contains<DF7>()) temp.template get<DF7>() = any_cast<DF7>(i->second);
+                else if (i->second.template contains<DF8>()) temp.template get<DF8>() = any_cast<DF8>(i->second);
+                else if (i->second.template contains<DF9>()) temp.template get<DF9>() = any_cast<DF9>(i->second);
+                else if (i->second.template contains<DF10>()) temp.template get<DF10>() = any_cast<DF10>(i->second);
+                else throw serialization_error("Can't serialize one_vs_one_decision_function.  Not all decision functions defined.");
+
+                serialize(temp,out);
+            }
+        }
+        catch (serialization_error& e)
+        {
+            throw serialization_error(e.info + "\n   while serializing an object of type one_vs_one_decision_function");
+        }
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <typename sample_type, typename scalar_type>
+        struct copy_to_df_helper
+        {
+            copy_to_df_helper(any_decision_function<sample_type, scalar_type>& target_) : target(target_) {}
+
+            any_decision_function<sample_type, scalar_type>& target;
+
+            template <typename T>
+            void operator() (
+                const T& item
+            ) const
+            {
+                target = item;
+            }
+        };
+    }
+
+    template <
+        typename T,
+        typename DF1, typename DF2, typename DF3,
+        typename DF4, typename DF5, typename DF6,
+        typename DF7, typename DF8, typename DF9,
+        typename DF10 
+        >
+    void deserialize(
+        one_vs_one_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, 
+        std::istream& in 
+    )
+    {
+        try
+        {
+            type_safe_union<DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10> temp;
+            typedef typename T::label_type result_type;
+            typedef typename T::sample_type sample_type;
+            typedef typename T::scalar_type scalar_type;
+            typedef impl::copy_to_df_helper<sample_type, scalar_type> copy_to;
+
+            unsigned long version;
+            deserialize(version, in);
+
+            if (version != 1)
+                throw serialization_error("Can't deserialize one_vs_one_decision_function.  Wrong version.");
+
+            unsigned long size;
+            deserialize(size, in);
+
+            typedef std::map<unordered_pair<result_type>, any_decision_function<sample_type, scalar_type> > binary_function_table;
+            binary_function_table dfs;
+
+            unordered_pair<result_type> p;
+            for (unsigned long i = 0; i < size; ++i)
+            {
+                deserialize(p, in);
+                deserialize(temp, in);
+                if (temp.template contains<null_df>())
+                    throw serialization_error("A sub decision function of unknown type was encountered.");
+
+                temp.apply_to_contents(copy_to(dfs[p]));
+            }
+
+            item = one_vs_one_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>(dfs);
+        }
+        catch (serialization_error& e)
+        {
+            throw serialization_error(e.info + "\n   while deserializing an object of type one_vs_one_decision_function");
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ONE_VS_ONE_DECISION_FUnCTION_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/one_vs_one_decision_function_abstract.h b/ml/dlib/dlib/svm/one_vs_one_decision_function_abstract.h
new file mode 100644
index 000000000..cf22e0ba7
--- /dev/null
+++ b/ml/dlib/dlib/svm/one_vs_one_decision_function_abstract.h
@@ -0,0 +1,213 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_ONE_VS_ONE_DECISION_FUnCTION_ABSTRACT_Hh_
+#ifdef DLIB_ONE_VS_ONE_DECISION_FUnCTION_ABSTRACT_Hh_
+
+
+#include "../serialize.h"
+#include <map>
+#include "../any/any_decision_function_abstract.h"
+#include "../unordered_pair.h"
+#include "one_vs_one_trainer_abstract.h"
+#include "null_df.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename one_vs_one_trainer,
+        typename DF1 = null_df, typename DF2 = null_df, typename DF3 = null_df,
+        typename DF4 = null_df, typename DF5 = null_df, typename DF6 = null_df,
+        typename DF7 = null_df, typename DF8 = null_df, typename DF9 = null_df,
+        typename DF10 = null_df
+        >
+    class one_vs_one_decision_function
+    {
+        /*!
+            REQUIREMENTS ON one_vs_one_trainer
+                This should be an instantiation of the one_vs_one_trainer template.  
+                It is used to infer which types are used for various things, such as 
+                representing labels.
+
+            REQUIREMENTS ON DF*
+                These types can either be left at their default values or set
+                to any kind of decision function object capable of being
+                stored in an any_decision_function<sample_type,scalar_type>
+                object.  These types should also be serializable.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a multiclass classifier built out
+                of a set of binary classifiers.  Each binary classifier
+                is used to vote for the correct multiclass label using a 
+                one vs. one strategy.  Therefore, if you have N classes then
+                there will be N*(N-1)/2 binary classifiers inside this object.
+
+                Note that the DF* template arguments are only used if you want
+                to serialize and deserialize one_vs_one_decision_function objects. 
+                Specifically, all the types of binary decision function contained
+                within a one_vs_one_decision_function must be listed in the
+                template arguments if serialization and deserialization is to
+                be used.
+
+            THREAD SAFETY
+                It is always safe to use distinct instances of this object in different
+                threads.  However, when a single instance is shared between threads then
+                the following rules apply:
+                    It is safe to call the const members of this object from multiple
+                    threads so long as all the decision functions contained in this object
+                    are also threadsafe.  This is because the const members are purely
+                    read-only operations.  However, any operation that modifies a
+                    one_vs_one_decision_function is not threadsafe.
+        !*/
+    public:
+
+        typedef typename one_vs_one_trainer::label_type result_type;
+        typedef typename one_vs_one_trainer::sample_type sample_type;
+        typedef typename one_vs_one_trainer::scalar_type scalar_type;
+        typedef typename one_vs_one_trainer::mem_manager_type mem_manager_type;
+
+        typedef std::map<unordered_pair<result_type>, any_decision_function<sample_type, scalar_type> > binary_function_table;
+
+        one_vs_one_decision_function(
+        );
+        /*!
+            ensures
+                - #number_of_classes() == 0
+                - #get_binary_decision_functions().size() == 0
+                - #get_labels().size() == 0
+        !*/
+
+        explicit one_vs_one_decision_function(
+            const binary_function_table& decision_functions
+        ); 
+        /*!
+            requires
+                - find_missing_pairs(decision_functions).size() == 0
+                  (i.e. all pairs of labels have an associated decision function)
+            ensures
+                - #get_binary_decision_functions() == decision_functions
+                - #get_labels() == a list of all the labels which appear in the
+                  given set of decision functions
+                - #number_of_classes() == #get_labels().size() 
+        !*/
+
+        template <
+            typename df1, typename df2, typename df3, typename df4, typename df5,
+            typename df6, typename df7, typename df8, typename df9, typename df10
+            >
+        one_vs_one_decision_function (
+            const one_vs_one_decision_function<one_vs_one_trainer, 
+                                               df1, df2, df3, df4, df5,
+                                               df6, df7, df8, df9, df10>& item
+        ); 
+        /*!
+            ensures
+                - #*this will be a copy of item
+                - #number_of_classes() == item.number_of_classes()
+                - #get_labels() == item.get_labels()
+                - #get_binary_decision_functions() == item.get_binary_decision_functions()
+        !*/
+
+        const binary_function_table& get_binary_decision_functions (
+        ) const;
+        /*!
+            ensures
+                - returns the table of binary decision functions used by this
+                  object.  The correspondence between binary decision functions
+                  and multiclass labels is the following:
+                    - for each element i of get_binary_decision_functions()
+                        - i->first == the label pair associated with binary decision
+                          function i->second.  
+                        - if (decision function i->second outputs a value > 0) then
+                            - i->second is indicating that a test sample should
+                              receive a label of i->first.first
+                        - else
+                            - i->second is indicating that a test sample should
+                              receive a label of i->first.second
+        !*/
+
+        const std::vector<result_type> get_labels (
+        ) const;
+        /*!
+            ensures
+                - returns a vector containing all the labels which can be
+                  predicted by this object.
+        !*/
+
+        unsigned long number_of_classes (
+        ) const;
+        /*!
+            ensures
+                - returns get_labels().size()
+                  (i.e. returns the number of different labels/classes predicted by
+                  this object)
+        !*/
+
+        result_type operator() (
+            const sample_type& sample
+        ) const
+        /*!
+            requires
+                - number_of_classes() != 0
+            ensures
+                - evaluates all the decision functions in get_binary_decision_functions()
+                  and returns the label which received the most votes.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename DF1, typename DF2, typename DF3,
+        typename DF4, typename DF5, typename DF6,
+        typename DF7, typename DF8, typename DF9,
+        typename DF10 
+        >
+    void serialize(
+        const one_vs_one_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, 
+        std::ostream& out
+    );
+    /*!
+        ensures
+            - writes the given item to the output stream out.
+        throws
+            - serialization_error.  
+              This is thrown if there is a problem writing to the ostream or if item 
+              contains a type of decision function not listed among the DF* template 
+              arguments.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename DF1, typename DF2, typename DF3,
+        typename DF4, typename DF5, typename DF6,
+        typename DF7, typename DF8, typename DF9,
+        typename DF10 
+        >
+    void deserialize(
+        one_vs_one_decision_function<T,DF1,DF2,DF3,DF4,DF5,DF6,DF7,DF8,DF9,DF10>& item, 
+        std::istream& in 
+    );
+    /*!
+        ensures
+            - deserializes a one_vs_one_decision_function from in and stores it in item.
+        throws
+            - serialization_error.  
+              This is thrown if there is a problem reading from the istream or if the
+              serialized data contains decision functions not listed among the DF*
+              template arguments.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ONE_VS_ONE_DECISION_FUnCTION_ABSTRACT_Hh_
+
+
+
diff --git a/ml/dlib/dlib/svm/one_vs_one_trainer.h b/ml/dlib/dlib/svm/one_vs_one_trainer.h
new file mode 100644
index 000000000..2beec8f67
--- /dev/null
+++ b/ml/dlib/dlib/svm/one_vs_one_trainer.h
@@ -0,0 +1,249 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_ONE_VS_ONE_TRAiNER_Hh_
+#define DLIB_ONE_VS_ONE_TRAiNER_Hh_
+
+#include "one_vs_one_trainer_abstract.h"
+
+#include "one_vs_one_decision_function.h"
+#include <vector>
+
+#include "../unordered_pair.h"
+#include "multiclass_tools.h"
+
+#include <sstream>
+#include <iostream>
+
+#include "../any.h"
+#include <map>
+#include <set>
+#include "../threads.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename any_trainer,
+        typename label_type_ = double
+        >
+    class one_vs_one_trainer
+    {
+    public:
+        typedef label_type_ label_type;
+
+        typedef typename any_trainer::sample_type sample_type;
+        typedef typename any_trainer::scalar_type scalar_type;
+        typedef typename any_trainer::mem_manager_type mem_manager_type;
+
+        typedef one_vs_one_decision_function<one_vs_one_trainer> trained_function_type;
+
+        one_vs_one_trainer (
+        ) : 
+            verbose(false),
+            num_threads(4)
+        {}
+
+        void set_trainer (
+            const any_trainer& trainer
+        )
+        {
+            default_trainer = trainer;
+            trainers.clear();
+        }
+
+        void set_trainer (
+            const any_trainer& trainer,
+            const label_type& l1,
+            const label_type& l2
+        )
+        {
+            trainers[make_unordered_pair(l1,l2)] = trainer;
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void set_num_threads (
+            unsigned long num
+        )
+        {
+            num_threads = num;
+        }
+
+        unsigned long get_num_threads (
+        ) const
+        {
+            return num_threads;
+        }
+
+        struct invalid_label : public dlib::error 
+        { 
+            invalid_label(const std::string& msg, const label_type& l1_, const label_type& l2_
+                ) : dlib::error(msg), l1(l1_), l2(l2_) {};
+
+            virtual ~invalid_label(
+            ) throw() {}
+
+            label_type l1, l2;
+        };
+
+        trained_function_type train (
+            const std::vector<sample_type>& all_samples,
+            const std::vector<label_type>& all_labels
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(all_samples,all_labels),
+                "\t trained_function_type one_vs_one_trainer::train(all_samples,all_labels)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t all_samples.size():     " << all_samples.size() 
+                << "\n\t all_labels.size():      " << all_labels.size() 
+                );
+
+            const std::vector<label_type> distinct_labels = select_all_distinct_labels(all_labels);
+
+
+            // fill pairs with all the pairs of labels.  
+            std::vector<unordered_pair<label_type> > pairs;
+            for (unsigned long i = 0; i < distinct_labels.size(); ++i)
+            {
+                for (unsigned long j = i+1; j < distinct_labels.size(); ++j)
+                {
+                    pairs.push_back(unordered_pair<label_type>(distinct_labels[i], distinct_labels[j]));
+
+                    // make sure we have a trainer for this pair
+                    const typename binary_function_table::const_iterator itr = trainers.find(pairs.back());
+                    if (itr == trainers.end() && default_trainer.is_empty())
+                    {
+                        std::ostringstream sout;
+                        sout << "In one_vs_one_trainer, no trainer registered for the (" 
+                             << pairs.back().first << ", " << pairs.back().second << ") label pair.";
+                        throw invalid_label(sout.str(), pairs.back().first, pairs.back().second);
+                    }
+                }
+            }
+
+
+
+            // Now train on all the label pairs.  
+            parallel_for_helper helper(all_samples,all_labels,default_trainer,trainers,verbose,pairs);
+            parallel_for(num_threads, 0, pairs.size(), helper, 500);
+
+            if (helper.error_message.size() != 0)
+            {
+                throw dlib::error("binary trainer threw while training one vs. one classifier.  Error was: " + helper.error_message);
+            }
+            return trained_function_type(helper.dfs);
+        }
+
+    private:
+
+        typedef std::map<unordered_pair<label_type>, any_trainer> binary_function_table;
+
+        struct parallel_for_helper
+        {
+            parallel_for_helper(
+                const std::vector<sample_type>& all_samples_,
+                const std::vector<label_type>& all_labels_,
+                const any_trainer& default_trainer_,
+                const binary_function_table& trainers_,
+                const bool verbose_,
+                const std::vector<unordered_pair<label_type> >& pairs_
+            ) : 
+                all_samples(all_samples_),
+                all_labels(all_labels_),
+                default_trainer(default_trainer_),
+                trainers(trainers_), 
+                verbose(verbose_),
+                pairs(pairs_)
+            {}
+
+            void operator()(long i) const 
+            {
+                try
+                {
+                    std::vector<sample_type> samples;
+                    std::vector<scalar_type> labels;
+
+                    const unordered_pair<label_type> p = pairs[i];
+
+                    // pick out the samples corresponding to these two classes
+                    for (unsigned long k = 0; k < all_samples.size(); ++k)
+                    {
+                        if (all_labels[k] == p.first)
+                        {
+                            samples.push_back(all_samples[k]);
+                            labels.push_back(+1);
+                        }
+                        else if (all_labels[k] == p.second)
+                        {
+                            samples.push_back(all_samples[k]);
+                            labels.push_back(-1);
+                        }
+                    }
+
+                    if (verbose)
+                    {
+                        auto_mutex lock(class_mutex);
+                        std::cout << "Training classifier for " << p.first << " vs. " << p.second << std::endl;
+                    }
+
+                    any_trainer trainer;
+                    // now train a binary classifier using the samples we selected
+                    { auto_mutex lock(class_mutex); 
+                    const typename binary_function_table::const_iterator itr = trainers.find(p);
+                    if (itr != trainers.end())
+                        trainer = itr->second;
+                    else 
+                        trainer = default_trainer;
+                    }
+
+                    any_decision_function<sample_type,scalar_type> binary_df = trainer.train(samples, labels);
+
+                    auto_mutex lock(class_mutex);
+                    dfs[p] = binary_df;
+                }
+                catch (std::exception& e)
+                {
+                    auto_mutex lock(class_mutex);
+                    error_message = e.what();
+                }
+            }
+
+            mutable typename trained_function_type::binary_function_table dfs;
+            mutex class_mutex;
+            mutable std::string error_message;
+
+            const std::vector<sample_type>& all_samples;
+            const std::vector<label_type>& all_labels;
+            const any_trainer& default_trainer;
+            const binary_function_table& trainers;
+            const bool verbose;
+            const std::vector<unordered_pair<label_type> >& pairs;
+        };
+
+        
+        any_trainer default_trainer;
+        binary_function_table trainers;
+        bool verbose;
+        unsigned long num_threads;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ONE_VS_ONE_TRAiNER_Hh_
+
diff --git a/ml/dlib/dlib/svm/one_vs_one_trainer_abstract.h b/ml/dlib/dlib/svm/one_vs_one_trainer_abstract.h
new file mode 100644
index 000000000..42ba35815
--- /dev/null
+++ b/ml/dlib/dlib/svm/one_vs_one_trainer_abstract.h
@@ -0,0 +1,166 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_ONE_VS_ONE_TRAiNER_ABSTRACT_Hh_
+#ifdef DLIB_ONE_VS_ONE_TRAiNER_ABSTRACT_Hh_
+
+
+#include "one_vs_one_decision_function_abstract.h"
+#include <vector>
+
+#include "../any/any_trainer_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename any_trainer,
+        typename label_type_ = double
+        >
+    class one_vs_one_trainer
+    {
+        /*!
+            REQUIREMENTS ON any_trainer
+                must be an instantiation of the dlib::any_trainer template.   
+
+            REQUIREMENTS ON label_type_
+                label_type_ must be default constructable, copyable, and comparable using
+                operator < and ==.  It must also be possible to write it to an std::ostream
+                using operator<<.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for turning a bunch of binary classifiers
+                into a multiclass classifier.  It does this by training the binary
+                classifiers in a one vs. one fashion.  That is, if you have N possible
+                classes then it trains N*(N-1)/2 binary classifiers which are then used
+                to vote on the identity of a test sample.
+
+                This object works with any kind of binary classification trainer object
+                capable of being assigned to an any_trainer object.  (e.g. the svm_nu_trainer) 
+        !*/
+
+    public:
+
+
+        typedef label_type_ label_type;
+
+        typedef typename any_trainer::sample_type sample_type;
+        typedef typename any_trainer::scalar_type scalar_type;
+        typedef typename any_trainer::mem_manager_type mem_manager_type;
+
+        typedef one_vs_one_decision_function<one_vs_one_trainer> trained_function_type;
+
+        one_vs_one_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized
+                - This object will not be verbose unless be_verbose() is called.
+                - No binary trainers are associated with *this.  I.e. you have to
+                  call set_trainer() before calling train().
+                - #get_num_threads() == 4
+        !*/
+
+        void set_trainer (
+            const any_trainer& trainer
+        );
+        /*!
+            ensures
+                - sets the trainer used for all pairs of training.  Any previous 
+                  calls to set_trainer() are overridden by this function.  Even the
+                  more specific set_trainer(trainer, l1, l2) form. 
+        !*/
+
+        void set_trainer (
+            const any_trainer& trainer,
+            const label_type& l1,
+            const label_type& l2
+        );
+        /*!
+            requires
+                - l1 != l2
+            ensures
+                - Sets the trainer object used to create a binary classifier to
+                  distinguish l1 labeled samples from l2 labeled samples.
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_num_threads (
+            unsigned long num
+        );
+        /*!
+            ensures
+                - #get_num_threads() == num
+        !*/
+
+        unsigned long get_num_threads (
+        ) const;
+        /*!
+            ensures
+                - returns the number of threads used during training.  You should 
+                  usually set this equal to the number of processing cores on your
+                  machine.
+        !*/
+
+        struct invalid_label : public dlib::error 
+        { 
+            /*!
+                This is the exception thrown by the train() function below.
+            !*/
+            label_type l1, l2;
+        };
+
+        trained_function_type train (
+            const std::vector<sample_type>& all_samples,
+            const std::vector<label_type>& all_labels
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(all_samples, all_labels)
+            ensures
+                - trains a bunch of binary classifiers in a one vs one fashion to solve the given 
+                  multiclass classification problem.  
+                - returns a one_vs_one_decision_function F with the following properties:
+                    - F contains all the learned binary classifiers and can be used to predict
+                      the labels of new samples.
+                    - if (new_x is a sample predicted to have a label of L) then
+                        - F(new_x) == L
+                    - F.get_labels() == select_all_distinct_labels(all_labels)
+                    - F.number_of_classes() == select_all_distinct_labels(all_labels).size()
+            throws
+                - invalid_label
+                  This exception is thrown if there are labels in all_labels which don't have
+                  any corresponding trainer object.  This will never happen if set_trainer(trainer)
+                  has been called.  However, if only the set_trainer(trainer,l1,l2) form has been
+                  used then this exception is thrown if not all necessary label pairs have been
+                  given a trainer.
+
+                  invalid_label::l1 and invalid_label::l2 will contain the label pair which is
+                  missing a trainer object.  Additionally, the exception will contain an
+                  informative error message available via invalid_label::what().
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ONE_VS_ONE_TRAiNER_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/pegasos.h b/ml/dlib/dlib/svm/pegasos.h
new file mode 100644
index 000000000..c28093fe0
--- /dev/null
+++ b/ml/dlib/dlib/svm/pegasos.h
@@ -0,0 +1,710 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_PEGASoS_
+#define DLIB_PEGASoS_
+
+#include "pegasos_abstract.h"
+#include <cmath>
+#include "../algs.h"
+#include "function.h"
+#include "kernel.h"
+#include "kcentroid.h"
+#include <iostream>
+#include <memory>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_pegasos
+    {
+        typedef kcentroid<offset_kernel<K> > kc_type;
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        template <typename K_>
+        struct rebind {
+            typedef svm_pegasos<K_> other;
+        };
+
+        svm_pegasos (
+        ) :
+            max_sv(40),
+            lambda_c1(0.0001),
+            lambda_c2(0.0001),
+            tau(0.01),
+            tolerance(0.01),
+            train_count(0),
+            w(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false)
+        {
+        }
+
+        svm_pegasos (
+            const kernel_type& kernel_, 
+            const scalar_type& lambda_,
+            const scalar_type& tolerance_,
+            unsigned long max_num_sv
+        ) :
+            max_sv(max_num_sv),
+            kernel(kernel_),
+            lambda_c1(lambda_),
+            lambda_c2(lambda_),
+            tau(0.01),
+            tolerance(tolerance_),
+            train_count(0),
+            w(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(lambda_ > 0 && tolerance > 0 && max_num_sv > 0,
+                        "\tsvm_pegasos::svm_pegasos(kernel,lambda,tolerance)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t lambda_: " << lambda_ 
+                        << "\n\t max_num_sv: " << max_num_sv 
+            );
+        }
+
+        void clear (
+        )
+        {
+            // reset the w vector back to its initial state
+            w = kc_type(offset_kernel<kernel_type>(kernel,tau),tolerance, max_sv, false);
+            train_count = 0;
+        }
+
+        void set_kernel (
+            kernel_type k
+        )
+        {
+            kernel = k;
+            clear();
+        }
+
+        void set_max_num_sv (
+            unsigned long max_num_sv
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(max_num_sv > 0,
+                        "\tvoid svm_pegasos::set_max_num_sv(max_num_sv)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t max_num_sv: " << max_num_sv 
+            );
+            max_sv = max_num_sv; 
+            clear();
+        }
+
+        unsigned long get_max_num_sv (
+        ) const
+        {
+            return max_sv;
+        }
+
+        void set_tolerance (
+            double tol
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < tol,
+                        "\tvoid svm_pegasos::set_tolerance(tol)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t tol: " << tol 
+            );
+            tolerance = tol;
+            clear();
+        }
+
+        void set_lambda (
+            scalar_type lambda_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < lambda_,
+                        "\tvoid svm_pegasos::set_lambda(lambda_)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t lambda_: " << lambda_ 
+            );
+            lambda_c1 = lambda_;
+            lambda_c2 = lambda_;
+
+            max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2));
+            clear();
+        }
+
+        void set_lambda_class1 (
+            scalar_type lambda_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < lambda_,
+                        "\tvoid svm_pegasos::set_lambda_class1(lambda_)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t lambda_: " << lambda_ 
+            );
+            lambda_c1 = lambda_;
+            max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2));
+            clear();
+        }
+
+        void set_lambda_class2 (
+            scalar_type lambda_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < lambda_,
+                        "\tvoid svm_pegasos::set_lambda_class2(lambda_)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t lambda_: " << lambda_ 
+            );
+            lambda_c2 = lambda_;
+            max_wnorm = 1/std::sqrt(std::min(lambda_c1, lambda_c2));
+            clear();
+        }
+
+        const scalar_type get_lambda_class1 (
+        ) const
+        {
+            return lambda_c1;
+        }
+
+        const scalar_type get_lambda_class2 (
+        ) const
+        {
+            return lambda_c2;
+        }
+
+        const scalar_type get_tolerance (
+        ) const
+        {
+            return tolerance;
+        }
+
+        const kernel_type get_kernel (
+        ) const
+        {
+            return kernel;
+        }
+
+        unsigned long get_train_count (
+        ) const
+        {
+            return static_cast<unsigned long>(train_count);
+        }
+
+        scalar_type train (
+            const sample_type& x,
+            const scalar_type& y
+        ) 
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(y == -1 || y == 1,
+                        "\tscalar_type svm_pegasos::train(x,y)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t y: " << y
+            );
+
+            const double lambda = (y==+1)? lambda_c1 : lambda_c2;
+
+            ++train_count;
+            const scalar_type learning_rate = 1/(lambda*train_count);
+
+            // if this sample point is within the margin of the current hyperplane
+            if (y*w.inner_product(x) < 1)
+            {
+
+                // compute: w = (1-learning_rate*lambda)*w + y*learning_rate*x
+                w.train(x,  1 - learning_rate*lambda,  y*learning_rate);
+
+                scalar_type wnorm = std::sqrt(w.squared_norm());
+                scalar_type temp = max_wnorm/wnorm;
+                if (temp < 1)
+                    w.scale_by(temp);
+            }
+            else
+            {
+                w.scale_by(1 - learning_rate*lambda);
+            }
+
+            // return the current learning rate
+            return 1/(std::min(lambda_c1,lambda_c2)*train_count);
+        }
+
+        scalar_type operator() (
+            const sample_type& x
+        ) const
+        {
+            return w.inner_product(x);
+        }
+
+        const decision_function<kernel_type> get_decision_function (
+        ) const
+        {
+            distance_function<offset_kernel<kernel_type> > df = w.get_distance_function();
+            return decision_function<kernel_type>(df.get_alpha(), -tau*sum(df.get_alpha()), kernel, df.get_basis_vectors());
+        }
+
+        void swap (
+            svm_pegasos& item
+        )
+        {
+            exchange(max_sv,         item.max_sv);
+            exchange(kernel,         item.kernel);
+            exchange(lambda_c1,      item.lambda_c1);
+            exchange(lambda_c2,      item.lambda_c2);
+            exchange(max_wnorm,      item.max_wnorm);
+            exchange(tau,            item.tau);
+            exchange(tolerance,      item.tolerance);
+            exchange(train_count,    item.train_count);
+            exchange(w,              item.w);
+        }
+
+        friend void serialize(const svm_pegasos& item, std::ostream& out)
+        {
+            serialize(item.max_sv, out);
+            serialize(item.kernel, out);
+            serialize(item.lambda_c1, out);
+            serialize(item.lambda_c2, out);
+            serialize(item.max_wnorm, out);
+            serialize(item.tau, out);
+            serialize(item.tolerance, out);
+            serialize(item.train_count, out);
+            serialize(item.w, out);
+        }
+
+        friend void deserialize(svm_pegasos& item, std::istream& in)
+        {
+            deserialize(item.max_sv, in);
+            deserialize(item.kernel, in);
+            deserialize(item.lambda_c1, in);
+            deserialize(item.lambda_c2, in);
+            deserialize(item.max_wnorm, in);
+            deserialize(item.tau, in);
+            deserialize(item.tolerance, in);
+            deserialize(item.train_count, in);
+            deserialize(item.w, in);
+        }
+
+    private:
+
+        unsigned long max_sv;
+        kernel_type kernel;
+        scalar_type lambda_c1;
+        scalar_type lambda_c2;
+        scalar_type max_wnorm;
+        scalar_type tau;
+        scalar_type tolerance;
+        scalar_type train_count;
+        kc_type w;
+
+    }; // end of class svm_pegasos
+
+    template <
+        typename K 
+        >
+    void swap (
+        svm_pegasos<K>& a,
+        svm_pegasos<K>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    void replicate_settings (
+        const svm_pegasos<T>& source,
+        svm_pegasos<U>& dest
+    )
+    {
+        dest.set_tolerance(source.get_tolerance());
+        dest.set_lambda_class1(source.get_lambda_class1());
+        dest.set_lambda_class2(source.get_lambda_class2());
+        dest.set_max_num_sv(source.get_max_num_sv());
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    class batch_trainer 
+    {
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename K,
+            typename sample_vector_type
+            >
+        class caching_kernel 
+        {
+        public:
+            typedef typename K::scalar_type scalar_type;
+            typedef long sample_type;
+            //typedef typename K::sample_type sample_type;
+            typedef typename K::mem_manager_type mem_manager_type;
+
+            caching_kernel () {}
+
+            caching_kernel (
+                const K& kern,
+                const sample_vector_type& samps,
+                long cache_size_
+            ) : real_kernel(kern), samples(&samps), counter(0)  
+            {
+                cache_size = std::min<long>(cache_size_, samps.size());
+
+                cache.reset(new cache_type);
+                cache->frequency_of_use.resize(samps.size());
+                for (long i = 0; i < samps.size(); ++i)
+                    cache->frequency_of_use[i] = std::make_pair(0, i);
+
+                // Set the cache build/rebuild threshold so that we have to have
+                // as many cache misses as there are entries in the cache before
+                // we build/rebuild.
+                counter_threshold = samps.size()*cache_size;
+                cache->sample_location.assign(samples->size(), -1);
+            }
+
+            scalar_type operator() (
+                const sample_type& a,
+                const sample_type& b
+            )  const
+            { 
+                // rebuild the cache every so often
+                if (counter > counter_threshold )
+                {
+                    build_cache();
+                }
+
+                const long a_loc = cache->sample_location[a];
+                const long b_loc = cache->sample_location[b];
+
+                cache->frequency_of_use[a].first += 1;
+                cache->frequency_of_use[b].first += 1;
+
+                if (a_loc != -1)
+                {
+                    return cache->kernel(a_loc, b);
+                }
+                else if (b_loc != -1)
+                {
+                    return cache->kernel(b_loc, a);
+                }
+                else
+                {
+                    ++counter;
+                    return real_kernel((*samples)(a), (*samples)(b));
+                }
+            }
+
+            bool operator== (
+                const caching_kernel& item
+            ) const
+            {
+                return item.real_kernel == real_kernel &&
+                    item.samples == samples;
+            }
+
+        private:
+            K real_kernel;
+
+            void build_cache (
+            ) const
+            {
+                std::sort(cache->frequency_of_use.rbegin(), cache->frequency_of_use.rend());
+                counter = 0;
+
+
+                cache->kernel.set_size(cache_size, samples->size());
+                cache->sample_location.assign(samples->size(), -1);
+
+                // loop over all the samples in the cache
+                for (long i = 0; i < cache_size; ++i)
+                {
+                    const long cur = cache->frequency_of_use[i].second;
+                    cache->sample_location[cur] = i;
+
+                    // now populate all possible kernel products with the current sample
+                    for (long j = 0; j < samples->size(); ++j)
+                    {
+                        cache->kernel(i, j) = real_kernel((*samples)(cur), (*samples)(j));
+                    }
+
+                }
+
+                // reset the frequency of use metrics
+                for (long i = 0; i < samples->size(); ++i)
+                    cache->frequency_of_use[i] = std::make_pair(0, i);
+            }
+
+
+            struct cache_type
+            {
+                matrix<scalar_type> kernel;  
+
+                std::vector<long> sample_location; // where in the cache a sample is.  -1 means not in cache
+                std::vector<std::pair<long,long> > frequency_of_use;  
+            };
+
+            const sample_vector_type* samples = 0;
+
+            std::shared_ptr<cache_type> cache;
+            mutable unsigned long counter = 0;
+            unsigned long counter_threshold = 0;
+            long cache_size = 0;
+        };
+
+    // ------------------------------------------------------------------------------------
+
+    public:
+        typedef typename trainer_type::kernel_type kernel_type;
+        typedef typename trainer_type::scalar_type scalar_type;
+        typedef typename trainer_type::sample_type sample_type;
+        typedef typename trainer_type::mem_manager_type mem_manager_type;
+        typedef typename trainer_type::trained_function_type trained_function_type;
+
+
+        batch_trainer (
+        ) :
+            min_learning_rate(0.1),
+            use_cache(false),
+            cache_size(100)
+        {
+        }
+
+        batch_trainer (
+            const trainer_type& trainer_, 
+            const scalar_type min_learning_rate_,
+            bool verbose_,
+            bool use_cache_,
+            long cache_size_ = 100
+        ) :
+            trainer(trainer_),
+            min_learning_rate(min_learning_rate_),
+            verbose(verbose_),
+            use_cache(use_cache_),
+            cache_size(cache_size_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < min_learning_rate_ &&
+                        cache_size_ > 0,
+                        "\tbatch_trainer::batch_trainer()"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t min_learning_rate_: " << min_learning_rate_ 
+                        << "\n\t cache_size_: " << cache_size_ 
+            );
+            
+            trainer.clear();
+        }
+
+        const scalar_type get_min_learning_rate (
+        ) const 
+        {
+            return min_learning_rate;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            if (use_cache)
+                return do_train_cached(mat(x), mat(y));
+            else
+                return do_train(mat(x), mat(y));
+        }
+
+    private:
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+
+            dlib::rand rnd;
+
+            trainer_type my_trainer(trainer);
+
+            scalar_type cur_learning_rate = min_learning_rate + 10;
+            unsigned long count = 0;
+
+            while (cur_learning_rate > min_learning_rate)
+            {
+                const long i = rnd.get_random_32bit_number()%x.size();
+                // keep feeding the trainer data until its learning rate goes below our threshold
+                cur_learning_rate = my_trainer.train(x(i), y(i));
+
+                if (verbose)
+                {
+                    if ( (count&0x7FF) == 0)
+                    {
+                        std::cout << "\rbatch_trainer(): Percent complete: " 
+                                  << 100*min_learning_rate/cur_learning_rate << "             " << std::flush;
+                    }
+                    ++count;
+                }
+            }
+
+            if (verbose)
+            {
+                decision_function<kernel_type> df = my_trainer.get_decision_function();
+                std::cout << "\rbatch_trainer(): Percent complete: 100           " << std::endl;
+                std::cout << "    Num sv: " << df.basis_vectors.size() << std::endl;
+                std::cout << "    bias:   " << df.b << std::endl;
+                return df;
+            }
+            else
+            {
+                return my_trainer.get_decision_function();
+            }
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train_cached (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+
+            dlib::rand rnd;
+
+            // make a caching kernel
+            typedef caching_kernel<kernel_type, in_sample_vector_type> ckernel_type;
+            ckernel_type ck(trainer.get_kernel(), x, cache_size);
+
+            // now rebind the trainer to use the caching kernel
+            typedef typename trainer_type::template rebind<ckernel_type>::other rebound_trainer_type;
+            rebound_trainer_type my_trainer;
+            my_trainer.set_kernel(ck);
+            replicate_settings(trainer, my_trainer);
+
+            scalar_type cur_learning_rate = min_learning_rate + 10;
+            unsigned long count = 0;
+
+            while (cur_learning_rate > min_learning_rate)
+            {
+                const long i = rnd.get_random_32bit_number()%x.size();
+                // keep feeding the trainer data until its learning rate goes below our threshold
+                cur_learning_rate = my_trainer.train(i, y(i));
+
+                if (verbose)
+                {
+                    if ( (count&0x7FF) == 0)
+                    {
+                        std::cout << "\rbatch_trainer(): Percent complete: " 
+                                  << 100*min_learning_rate/cur_learning_rate << "             " << std::flush;
+                    }
+                    ++count;
+                }
+            }
+
+            if (verbose)
+            {
+                decision_function<ckernel_type> cached_df;
+                cached_df = my_trainer.get_decision_function();
+
+                std::cout << "\rbatch_trainer(): Percent complete: 100           " << std::endl;
+                std::cout << "    Num sv: " << cached_df.basis_vectors.size() << std::endl;
+                std::cout << "    bias:   " << cached_df.b << std::endl;
+
+                return decision_function<kernel_type> (
+                        cached_df.alpha,
+                        cached_df.b,
+                        trainer.get_kernel(),
+                        rowm(x, cached_df.basis_vectors)
+                        );
+            }
+            else
+            {
+                decision_function<ckernel_type> cached_df;
+                cached_df = my_trainer.get_decision_function();
+
+                return decision_function<kernel_type> (
+                        cached_df.alpha,
+                        cached_df.b,
+                        trainer.get_kernel(),
+                        rowm(x, cached_df.basis_vectors)
+                        );
+            }
+        }
+
+        trainer_type trainer;
+        scalar_type min_learning_rate;
+        bool verbose;
+        bool use_cache;
+        long cache_size;
+
+    }; // end of class batch_trainer
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const batch_trainer<trainer_type> batch (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type min_learning_rate = 0.1
+    ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, false); }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const batch_trainer<trainer_type> verbose_batch (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type min_learning_rate = 0.1
+    ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, false); }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const batch_trainer<trainer_type> batch_cached (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type min_learning_rate = 0.1,
+        long cache_size = 100
+    ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, true, cache_size); }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const batch_trainer<trainer_type> verbose_batch_cached (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type min_learning_rate = 0.1,
+        long cache_size = 100
+    ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, true, cache_size); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_PEGASoS_
+
diff --git a/ml/dlib/dlib/svm/pegasos_abstract.h b/ml/dlib/dlib/svm/pegasos_abstract.h
new file mode 100644
index 000000000..008b1cb94
--- /dev/null
+++ b/ml/dlib/dlib/svm/pegasos_abstract.h
@@ -0,0 +1,514 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_PEGASoS_ABSTRACT_
+#ifdef DLIB_PEGASoS_ABSTRACT_
+
+#include <cmath>
+#include "../algs.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kern_type
+        >
+    class svm_pegasos
+    {
+        /*!
+            REQUIREMENTS ON kern_type
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements an online algorithm for training a support 
+                vector machine for solving binary classification problems.  
+
+                The implementation of the Pegasos algorithm used by this object is based
+                on the following excellent paper:
+                    Pegasos: Primal estimated sub-gradient solver for SVM (2007)
+                    by Shai Shalev-Shwartz, Yoram Singer, Nathan Srebro 
+                    In ICML 
+
+                This SVM training algorithm has two interesting properties.  First, the 
+                pegasos algorithm itself converges to the solution in an amount of time
+                unrelated to the size of the training set (in addition to being quite fast
+                to begin with).  This makes it an appropriate algorithm for learning from
+                very large datasets.  Second, this object uses the dlib::kcentroid object 
+                to maintain a sparse approximation of the learned decision function.  
+                This means that the number of support vectors in the resulting decision 
+                function is also unrelated to the size of the dataset (in normal SVM
+                training algorithms, the number of support vectors grows approximately 
+                linearly with the size of the training set).  
+        !*/
+
+    public:
+        typedef kern_type kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        template <typename K_>
+        struct rebind {
+            typedef svm_pegasos<K_> other;
+        };
+
+        svm_pegasos (
+        );
+        /*!
+            ensures
+                - this object is properly initialized 
+                - #get_lambda_class1() == 0.0001
+                - #get_lambda_class2() == 0.0001
+                - #get_tolerance() == 0.01
+                - #get_train_count() == 0
+                - #get_max_num_sv() == 40
+        !*/
+
+        svm_pegasos (
+            const kernel_type& kernel_, 
+            const scalar_type& lambda_,
+            const scalar_type& tolerance_,
+            unsigned long max_num_sv
+        );
+        /*!
+            requires
+                - lambda_ > 0
+                - tolerance_ > 0
+                - max_num_sv > 0
+            ensures
+                - this object is properly initialized 
+                - #get_lambda_class1() == lambda_ 
+                - #get_lambda_class2() == lambda_ 
+                - #get_tolerance() == tolerance_
+                - #get_kernel() == kernel_
+                - #get_train_count() == 0
+                - #get_max_num_sv() == max_num_sv
+        !*/
+
+        void clear (
+        );
+        /*!
+            ensures
+                - #get_train_count() == 0
+                - clears out any memory of previous calls to train()
+                - doesn't change any of the algorithm parameters.  I.e.
+                    - #get_lambda_class1()  == get_lambda_class1()
+                    - #get_lambda_class2()  == get_lambda_class2()
+                    - #get_tolerance()      == get_tolerance()
+                    - #get_kernel()         == get_kernel()
+                    - #get_max_num_sv()     == get_max_num_sv()
+        !*/
+
+        const scalar_type get_lambda_class1 (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization term for the +1 class.  It is the 
+                  parameter that determines the trade off between trying to fit the 
+                  +1 training data exactly or allowing more errors but hopefully 
+                  improving the generalization ability of the resulting classifier.  
+                  Smaller values encourage exact fitting while larger values may 
+                  encourage better generalization. It is also worth noting that the 
+                  number of iterations it takes for this algorithm to converge is 
+                  proportional to 1/lambda.  So smaller values of this term cause 
+                  the running time of this algorithm to increase.  For more 
+                  information you should consult the paper referenced above.
+        !*/
+
+        const scalar_type get_lambda_class2 (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization term for the -1 class.  It has
+                  the same properties as the get_lambda_class1() parameter except that
+                  it applies to the -1 class.
+        !*/
+
+        const scalar_type get_tolerance (
+        ) const;
+        /*!
+            ensures
+                - returns the tolerance used by the internal kcentroid object to 
+                  represent the learned decision function.  Smaller values of this 
+                  tolerance will result in a more accurate representation of the 
+                  decision function but will use more support vectors (up to
+                  a max of get_max_num_sv()).  
+        !*/
+
+        unsigned long get_max_num_sv (
+        ) const;
+        /*!
+            ensures
+                - returns the maximum number of support vectors this object is
+                  allowed to use.
+        !*/
+
+        const kernel_type get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns the kernel used by this object
+        !*/
+
+        void set_kernel (
+            kernel_type k
+        );
+        /*!
+            ensures
+                - #get_kernel() == k
+                - #get_train_count() == 0
+                  (i.e. clears any memory of previous training)
+        !*/
+
+        void set_tolerance (
+            double tol
+        );
+        /*!
+            requires
+                - tol > 0
+            ensures
+                - #get_tolerance() == tol
+                - #get_train_count() == 0
+                  (i.e. clears any memory of previous training)
+        !*/
+
+        void set_max_num_sv (
+            unsigned long max_num_sv
+        );
+        /*!
+            requires
+                - max_num_sv > 0
+            ensures
+                - #get_max_num_sv() == max_num_sv 
+                - #get_train_count() == 0
+                  (i.e. clears any memory of previous training)
+        !*/
+
+        void set_lambda (
+            scalar_type lambda_
+        );
+        /*!
+            requires
+                - lambda_ > 0
+            ensures
+                - #get_lambda_class1() == lambda_
+                - #get_lambda_class2() == lambda_
+                - #get_train_count() == 0
+                  (i.e. clears any memory of previous training)
+        !*/
+
+        void set_lambda_class1 (
+            scalar_type lambda_
+        );
+        /*!
+            requires
+                - lambda_ > 0
+            ensures
+                - #get_lambda_class1() == lambda_ 
+                  #get_train_count() == 0
+                  (i.e. clears any memory of previous training)
+        !*/
+
+        void set_lambda_class2 (
+            scalar_type lambda_
+        );
+        /*!
+            requires
+                - lambda_ > 0
+            ensures
+                - #get_lambda_class2() == lambda_ 
+                  #get_train_count() == 0
+                  (i.e. clears any memory of previous training)
+        !*/
+
+        unsigned long get_train_count (
+        ) const;
+        /*!
+            ensures
+                - returns how many times this->train() has been called
+                  since this object was constructed or last cleared.  
+        !*/
+
+        scalar_type train (
+            const sample_type& x,
+            const scalar_type& y
+        );
+        /*!
+            requires
+                - y == 1 || y == -1
+            ensures
+                - trains this svm using the given sample x and label y
+                - #get_train_count() == get_train_count() + 1
+                - returns the current learning rate
+                  (i.e. 1/(get_train_count()*min(get_lambda_class1(),get_lambda_class2())) )
+        !*/
+
+        scalar_type operator() (
+            const sample_type& x
+        ) const;
+        /*!
+            ensures
+                - classifies the given x sample using the decision function
+                  this object has learned so far.  
+                - if (x is a sample predicted have +1 label) then
+                    - returns a number >= 0 
+                - else
+                    - returns a number < 0
+        !*/
+
+        const decision_function<kernel_type> get_decision_function (
+        ) const;
+        /*!
+            ensures
+                - returns a decision function F that represents the function learned 
+                  by this object so far.  I.e. it is the case that:
+                    - for all x: F(x) == (*this)(x)
+        !*/
+
+        void swap (
+            svm_pegasos& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kern_type 
+        >
+    void swap(
+        svm_pegasos<kern_type>& a, 
+        svm_pegasos<kern_type>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap function
+    !*/
+
+    template <
+        typename kern_type
+        >
+    void serialize (
+        const svm_pegasos<kern_type>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for svm_pegasos objects
+    !*/
+
+    template <
+        typename kern_type 
+        >
+    void deserialize (
+        svm_pegasos<kern_type>& item,
+        std::istream& in 
+    );
+    /*!
+        provides serialization support for svm_pegasos objects
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    void replicate_settings (
+        const svm_pegasos<T>& source,
+        svm_pegasos<U>& dest
+    );
+    /*!
+        ensures
+            - copies all the parameters from the source trainer to the dest trainer.
+            - #dest.get_tolerance() == source.get_tolerance()
+            - #dest.get_lambda_class1() == source.get_lambda_class1()
+            - #dest.get_lambda_class2() == source.get_lambda_class2()
+            - #dest.get_max_num_sv() == source.get_max_num_sv()
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    class batch_trainer 
+    {
+        /*!
+            REQUIREMENTS ON trainer_type
+                - trainer_type == some kind of online trainer object (e.g. svm_pegasos)
+                  replicate_settings() must also be defined for the type.
+
+            WHAT THIS OBJECT REPRESENTS
+                This is a trainer object that is meant to wrap online trainer objects 
+                that create decision_functions. It turns an online learning algorithm 
+                such as svm_pegasos into a batch learning object.  This allows you to 
+                use objects like svm_pegasos with functions (e.g. cross_validate_trainer) 
+                that expect batch mode training objects.
+        !*/
+
+    public:
+        typedef typename trainer_type::kernel_type kernel_type;
+        typedef typename trainer_type::scalar_type scalar_type;
+        typedef typename trainer_type::sample_type sample_type;
+        typedef typename trainer_type::mem_manager_type mem_manager_type;
+        typedef typename trainer_type::trained_function_type trained_function_type;
+
+
+        batch_trainer (
+        );
+        /*!
+            ensures
+                - This object is in an uninitialized state.  You must
+                  construct a real one with the other constructor and assign it
+                  to this instance before you use this object.
+        !*/
+
+        batch_trainer (
+            const trainer_type& online_trainer, 
+            const scalar_type min_learning_rate_,
+            bool verbose_,
+            bool use_cache_,
+            long cache_size_ = 100
+        );
+        /*!
+            requires
+                - min_learning_rate_ > 0
+                - cache_size_ > 0
+            ensures
+                - returns a batch trainer object that uses the given online_trainer object
+                  to train a decision function.
+                - #get_min_learning_rate() == min_learning_rate_
+                - if (verbose_ == true) then
+                    - this object will output status messages to standard out while
+                      training is under way.
+                - if (use_cache_ == true) then
+                    - this object will cache up to cache_size_ columns of the kernel 
+                      matrix during the training process.
+        !*/
+
+        const scalar_type get_min_learning_rate (
+        ) const;
+        /*!
+            ensures
+                - returns the min learning rate that the online trainer must reach
+                  before this object considers training to be complete.
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            ensures
+                - trains and returns a decision_function using the trainer that was 
+                  supplied to this object's constructor.
+                - training continues until the online training object indicates that
+                  its learning rate has dropped below get_min_learning_rate().
+            throws
+                - std::bad_alloc
+                - any exceptions thrown by the trainer_type object
+        !*/
+
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const batch_trainer<trainer_type> batch (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type min_learning_rate = 0.1
+    ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, false); }
+    /*!
+        requires
+            - min_learning_rate > 0
+            - trainer_type == some kind of online trainer object that creates decision_function
+              objects (e.g. svm_pegasos).  replicate_settings() must also be defined for the type.
+        ensures
+            - returns a batch_trainer object that has been instantiated with the 
+              given arguments.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const batch_trainer<trainer_type> verbose_batch (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type min_learning_rate = 0.1
+    ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, false); }
+    /*!
+        requires
+            - min_learning_rate > 0
+            - trainer_type == some kind of online trainer object that creates decision_function
+              objects (e.g. svm_pegasos).  replicate_settings() must also be defined for the type.
+        ensures
+            - returns a batch_trainer object that has been instantiated with the 
+              given arguments (and is verbose).
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const batch_trainer<trainer_type> batch_cached (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type min_learning_rate = 0.1,
+        long cache_size = 100
+    ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, false, true, cache_size); }
+    /*!
+        requires
+            - min_learning_rate > 0
+            - cache_size > 0
+            - trainer_type == some kind of online trainer object that creates decision_function
+              objects (e.g. svm_pegasos).  replicate_settings() must also be defined for the type.
+        ensures
+            - returns a batch_trainer object that has been instantiated with the 
+              given arguments (uses a kernel cache).
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const batch_trainer<trainer_type> verbose_batch_cached (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type min_learning_rate = 0.1,
+        long cache_size = 100
+    ) { return batch_trainer<trainer_type>(trainer, min_learning_rate, true, true, cache_size); }
+    /*!
+        requires
+            - min_learning_rate > 0
+            - cache_size > 0
+            - trainer_type == some kind of online trainer object that creates decision_function
+              objects (e.g. svm_pegasos).  replicate_settings() must also be defined for the type.
+        ensures
+            - returns a batch_trainer object that has been instantiated with the 
+              given arguments (is verbose and uses a kernel cache).
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+
+}
+
+#endif // DLIB_PEGASoS_ABSTRACT_
+
+
diff --git a/ml/dlib/dlib/svm/ranking_tools.h b/ml/dlib/dlib/svm/ranking_tools.h
new file mode 100644
index 000000000..3c77b41ae
--- /dev/null
+++ b/ml/dlib/dlib/svm/ranking_tools.h
@@ -0,0 +1,448 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_RANKING_ToOLS_Hh_
+#define DLIB_RANKING_ToOLS_Hh_
+
+#include "ranking_tools_abstract.h"
+
+#include "../algs.h"
+#include "../matrix.h"
+#include <vector>
+#include <utility>
+#include <algorithm>
+#include "sparse_vector.h"
+#include "../statistics.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct ranking_pair
+    {
+        ranking_pair() {}
+
+        ranking_pair(
+            const std::vector<T>& r, 
+            const std::vector<T>& nr
+        ) :
+            relevant(r), nonrelevant(nr) 
+        {}
+
+        std::vector<T> relevant;
+        std::vector<T> nonrelevant;
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const ranking_pair<T>& item,
+        std::ostream& out
+    )
+    {
+        int version = 1;
+        serialize(version, out);
+        serialize(item.relevant, out);
+        serialize(item.nonrelevant, out);
+    }
+
+
+    template <
+        typename T
+        >
+    void deserialize (
+        ranking_pair<T>& item,
+        std::istream& in 
+    )
+    {
+        int version = 0;
+        deserialize(version, in);
+        if (version != 1)
+            throw dlib::serialization_error("Wrong version found while deserializing dlib::ranking_pair");
+
+        deserialize(item.relevant, in);
+        deserialize(item.nonrelevant, in);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    typename disable_if<is_matrix<T>,bool>::type is_ranking_problem (
+        const std::vector<ranking_pair<T> >& samples
+    )
+    {
+        if (samples.size() == 0)
+            return false;
+
+
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            if (samples[i].relevant.size() == 0)
+                return false;
+            if (samples[i].nonrelevant.size() == 0)
+                return false;
+        }
+
+        return true;
+    }
+
+    template <
+        typename T
+        >
+    typename enable_if<is_matrix<T>,bool>::type is_ranking_problem (
+        const std::vector<ranking_pair<T> >& samples
+    )
+    {
+        if (samples.size() == 0)
+            return false;
+
+
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            if (samples[i].relevant.size() == 0)
+                return false;
+            if (samples[i].nonrelevant.size() == 0)
+                return false;
+        }
+
+        // If these are dense vectors then they must all have the same dimensionality.
+        const long dims = max_index_plus_one(samples[0].relevant);
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            for (unsigned long j = 0; j < samples[i].relevant.size(); ++j)
+            {
+                if (is_vector(samples[i].relevant[j]) == false)
+                    return false;
+
+                if (samples[i].relevant[j].size() != dims)
+                    return false;
+            }
+            for (unsigned long j = 0; j < samples[i].nonrelevant.size(); ++j)
+            {
+                if (is_vector(samples[i].nonrelevant[j]) == false)
+                    return false;
+
+                if (samples[i].nonrelevant[j].size() != dims)
+                    return false;
+            }
+        }
+
+        return true;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    unsigned long max_index_plus_one (
+        const ranking_pair<T>& item
+    )
+    {
+        return std::max(max_index_plus_one(item.relevant), max_index_plus_one(item.nonrelevant));
+    }
+
+    template <
+        typename T
+        >
+    unsigned long max_index_plus_one (
+        const std::vector<ranking_pair<T> >& samples
+    )
+    {
+        unsigned long dims = 0;
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            dims = std::max(dims, max_index_plus_one(samples[i]));
+        }
+        return dims;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    void count_ranking_inversions (
+        const std::vector<T>& x,
+        const std::vector<T>& y,
+        std::vector<unsigned long>& x_count,
+        std::vector<unsigned long>& y_count
+    )
+    {
+        x_count.assign(x.size(),0);
+        y_count.assign(y.size(),0);
+
+        if (x.size() == 0 || y.size() == 0)
+            return;
+
+        std::vector<std::pair<T,unsigned long> > xsort(x.size());
+        std::vector<std::pair<T,unsigned long> > ysort(y.size());
+        for (unsigned long i = 0; i < x.size(); ++i)
+            xsort[i] = std::make_pair(x[i], i);
+        for (unsigned long j = 0; j < y.size(); ++j)
+            ysort[j] = std::make_pair(y[j], j);
+
+        std::sort(xsort.begin(), xsort.end());
+        std::sort(ysort.begin(), ysort.end());
+
+
+        unsigned long i, j;
+
+        // Do the counting for the x values.
+        for (i = 0, j = 0; i < x_count.size(); ++i)
+        {
+            // Skip past y values that are in the correct order with respect to xsort[i].
+            while (j < ysort.size() && ysort[j].first < xsort[i].first) 
+                ++j;
+
+            x_count[xsort[i].second] = ysort.size() - j;
+        }
+
+
+        // Now do the counting for the y values.
+        for (i = 0, j = 0; j < y_count.size(); ++j)
+        {
+            // Skip past x values that are in the incorrect order with respect to ysort[j].
+            while (i < xsort.size() && !(ysort[j].first < xsort[i].first)) 
+                ++i;
+
+            y_count[ysort[j].second] = i;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        inline bool compare_first_reverse_second (
+            const std::pair<double,bool>& a,
+            const std::pair<double,bool>& b
+        )
+        {
+            if (a.first < b.first)
+                return true;
+            else if (a.first > b.first)
+                return false;
+            else if (a.second && !b.second)
+                return true;
+            else
+                return false;
+        }
+    }
+
+    template <
+        typename ranking_function,
+        typename T
+        >
+    matrix<double,1,2> test_ranking_function (
+        const ranking_function& funct,
+        const std::vector<ranking_pair<T> >& samples
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_ranking_problem(samples),
+            "\t double test_ranking_function()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t samples.size(): " << samples.size() 
+            << "\n\t is_ranking_problem(samples): " << is_ranking_problem(samples)
+            );
+
+        unsigned long total_pairs = 0;
+        unsigned long total_wrong = 0;
+
+        std::vector<double> rel_scores;
+        std::vector<double> nonrel_scores;
+        std::vector<unsigned long> rel_counts;
+        std::vector<unsigned long> nonrel_counts;
+
+        running_stats<double> rs;
+        std::vector<std::pair<double,bool> > total_scores;
+        std::vector<bool> total_ranking;
+
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            rel_scores.resize(samples[i].relevant.size());
+            nonrel_scores.resize(samples[i].nonrelevant.size());
+            total_scores.clear();
+
+            for (unsigned long k = 0; k < rel_scores.size(); ++k)
+            {
+                rel_scores[k] = funct(samples[i].relevant[k]);
+                total_scores.push_back(std::make_pair(rel_scores[k], true));
+            }
+
+            for (unsigned long k = 0; k < nonrel_scores.size(); ++k)
+            {
+                nonrel_scores[k] = funct(samples[i].nonrelevant[k]);
+                total_scores.push_back(std::make_pair(nonrel_scores[k], false));
+            }
+
+            // Now compute the average precision for this sample.  We need to sort the
+            // results and the back them into total_ranking.  Note that we sort them so
+            // that, if you get a block of ranking values that are all equal, the elements
+            // marked as true will come last.  This prevents a ranking from outputting a
+            // constant value for everything and still getting a good MAP score.
+            std::sort(total_scores.rbegin(), total_scores.rend(), impl::compare_first_reverse_second);
+            total_ranking.clear();
+            for (unsigned long i = 0; i < total_scores.size(); ++i)
+                total_ranking.push_back(total_scores[i].second);
+            rs.add(average_precision(total_ranking));
+
+
+            count_ranking_inversions(rel_scores, nonrel_scores, rel_counts, nonrel_counts);
+
+            total_pairs += rel_scores.size()*nonrel_scores.size();
+
+            // Note that we don't need to look at nonrel_counts since it is redundant with
+            // the information in rel_counts in this case.
+            total_wrong += sum(mat(rel_counts));
+        }
+
+        const double rank_swaps = static_cast<double>(total_pairs - total_wrong) / total_pairs;
+        const double mean_average_precision = rs.mean();
+        matrix<double,1,2> res;
+        res = rank_swaps, mean_average_precision;
+        return res;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename ranking_function,
+        typename T
+        >
+    matrix<double,1,2> test_ranking_function (
+        const ranking_function& funct,
+        const ranking_pair<T>& sample
+    )
+    {
+        return test_ranking_function(funct, std::vector<ranking_pair<T> >(1,sample));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename T
+        >
+    matrix<double,1,2> cross_validate_ranking_trainer (
+        const trainer_type& trainer,
+        const std::vector<ranking_pair<T> >& samples,
+        const long folds
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_ranking_problem(samples) &&
+                    1 < folds && folds <= static_cast<long>(samples.size()),
+            "\t double cross_validate_ranking_trainer()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t samples.size(): " << samples.size() 
+            << "\n\t folds:  " << folds 
+            << "\n\t is_ranking_problem(samples): " << is_ranking_problem(samples)
+            );
+
+
+        const long num_in_test  = samples.size()/folds;
+        const long num_in_train = samples.size() - num_in_test;
+
+
+        std::vector<ranking_pair<T> > samples_test, samples_train;
+
+
+        long next_test_idx = 0;
+
+        unsigned long total_pairs = 0;
+        unsigned long total_wrong = 0;
+
+        std::vector<double> rel_scores;
+        std::vector<double> nonrel_scores;
+        std::vector<unsigned long> rel_counts;
+        std::vector<unsigned long> nonrel_counts;
+
+        running_stats<double> rs;
+        std::vector<std::pair<double,bool> > total_scores;
+        std::vector<bool> total_ranking;
+
+        for (long i = 0; i < folds; ++i)
+        {
+            samples_test.clear();
+            samples_train.clear();
+
+            // load up the test samples
+            for (long cnt = 0; cnt < num_in_test; ++cnt)
+            {
+                samples_test.push_back(samples[next_test_idx]);
+                next_test_idx = (next_test_idx + 1)%samples.size();
+            }
+
+            // load up the training samples
+            long next = next_test_idx;
+            for (long cnt = 0; cnt < num_in_train; ++cnt)
+            {
+                samples_train.push_back(samples[next]);
+                next = (next + 1)%samples.size();
+            }
+
+
+            const typename trainer_type::trained_function_type& df = trainer.train(samples_train);
+
+            // check how good df is on the test data
+            for (unsigned long i = 0; i < samples_test.size(); ++i)
+            {
+                rel_scores.resize(samples_test[i].relevant.size());
+                nonrel_scores.resize(samples_test[i].nonrelevant.size());
+
+                total_scores.clear();
+
+                for (unsigned long k = 0; k < rel_scores.size(); ++k)
+                {
+                    rel_scores[k] = df(samples_test[i].relevant[k]);
+                    total_scores.push_back(std::make_pair(rel_scores[k], true));
+                }
+
+                for (unsigned long k = 0; k < nonrel_scores.size(); ++k)
+                {
+                    nonrel_scores[k] = df(samples_test[i].nonrelevant[k]);
+                    total_scores.push_back(std::make_pair(nonrel_scores[k], false));
+                }
+
+                // Now compute the average precision for this sample.  We need to sort the
+                // results and the back them into total_ranking.  Note that we sort them so
+                // that, if you get a block of ranking values that are all equal, the elements
+                // marked as true will come last.  This prevents a ranking from outputting a
+                // constant value for everything and still getting a good MAP score.
+                std::sort(total_scores.rbegin(), total_scores.rend(), impl::compare_first_reverse_second);
+                total_ranking.clear();
+                for (unsigned long i = 0; i < total_scores.size(); ++i)
+                    total_ranking.push_back(total_scores[i].second);
+                rs.add(average_precision(total_ranking));
+
+
+                count_ranking_inversions(rel_scores, nonrel_scores, rel_counts, nonrel_counts);
+
+                total_pairs += rel_scores.size()*nonrel_scores.size();
+
+                // Note that we don't need to look at nonrel_counts since it is redundant with
+                // the information in rel_counts in this case.
+                total_wrong += sum(mat(rel_counts));
+            }
+
+        } // for (long i = 0; i < folds; ++i)
+
+        const double rank_swaps = static_cast<double>(total_pairs - total_wrong) / total_pairs;
+        const double mean_average_precision = rs.mean();
+        matrix<double,1,2> res;
+        res = rank_swaps, mean_average_precision;
+        return res;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RANKING_ToOLS_Hh_
+
diff --git a/ml/dlib/dlib/svm/ranking_tools_abstract.h b/ml/dlib/dlib/svm/ranking_tools_abstract.h
new file mode 100644
index 000000000..af6c7a2e3
--- /dev/null
+++ b/ml/dlib/dlib/svm/ranking_tools_abstract.h
@@ -0,0 +1,247 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_RANKING_ToOLS_ABSTRACT_Hh_
+#ifdef DLIB_RANKING_ToOLS_ABSTRACT_Hh_
+
+
+#include "../algs.h"
+#include "../matrix.h"
+#include <vector>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct ranking_pair
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is used to contain a ranking example.  In particular, we say
+                that a good ranking of T objects is one in which all the elements in
+                this->relevant are ranked higher than the elements of this->nonrelevant.
+                Therefore, ranking_pair objects are used to represent training examples for
+                learning-to-rank tasks.
+        !*/
+
+        ranking_pair() {}
+        /*!
+            ensures
+                - #relevant.size() == 0
+                - #nonrelevant.size() == 0
+        !*/
+
+        ranking_pair(
+            const std::vector<T>& r, 
+            const std::vector<T>& nr
+        ) : relevant(r), nonrelevant(nr) {}
+        /*!
+            ensures
+                - #relevant == r
+                - #nonrelevant == nr
+        !*/
+
+        std::vector<T> relevant;
+        std::vector<T> nonrelevant;
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const ranking_pair<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support
+    !*/
+
+    template <
+        typename T
+        >
+    void deserialize (
+        ranking_pair<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    bool is_ranking_problem (
+        const std::vector<ranking_pair<T> >& samples
+    );
+    /*!
+        ensures
+            - returns true if the data in samples represents a valid learning-to-rank
+              learning problem.  That is, this function returns true if all of the
+              following are true and false otherwise:
+                - samples.size() > 0
+                - for all valid i:
+                    - samples[i].relevant.size() > 0
+                    - samples[i].nonrelevant.size() > 0
+                - if (is_matrix<T>::value == true) then 
+                    - All the elements of samples::nonrelevant and samples::relevant must
+                      represent row or column vectors and they must be the same dimension.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    unsigned long max_index_plus_one (
+        const ranking_pair<T>& item
+    );
+    /*!
+        requires
+            - T must be a dlib::matrix capable of storing column vectors or T must be a
+              sparse vector type as defined in dlib/svm/sparse_vector_abstract.h.
+        ensures
+            - returns std::max(max_index_plus_one(item.relevant), max_index_plus_one(item.nonrelevant)).
+              Therefore, this function can be used to find the dimensionality of the
+              vectors stored in item.
+    !*/
+
+    template <
+        typename T
+        >
+    unsigned long max_index_plus_one (
+        const std::vector<ranking_pair<T> >& samples
+    );
+    /*!
+        requires
+            - T must be a dlib::matrix capable of storing column vectors or T must be a
+              sparse vector type as defined in dlib/svm/sparse_vector_abstract.h.
+        ensures
+            - returns the maximum of max_index_plus_one(samples[i]) over all valid values
+              of i.  Therefore, this function can be used to find the dimensionality of the
+              vectors stored in samples
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    void count_ranking_inversions (
+        const std::vector<T>& x,
+        const std::vector<T>& y,
+        std::vector<unsigned long>& x_count,
+        std::vector<unsigned long>& y_count
+    );
+    /*!
+        requires
+            - T objects must be copyable
+            - T objects must be comparable via operator<
+        ensures
+            - This function counts how many times we see a y value greater than or equal to
+              an x value.  This is done efficiently in O(n*log(n)) time via the use of
+              quick sort.
+            - #x_count.size() == x.size()
+            - #y_count.size() == y.size()
+            - for all valid i:
+                - #x_count[i] == how many times a value in y was >= x[i].
+            - for all valid j:
+                - #y_count[j] == how many times a value in x was <= y[j].
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename ranking_function,
+        typename T
+        >
+    matrix<double,1,2> test_ranking_function (
+        const ranking_function& funct,
+        const std::vector<ranking_pair<T> >& samples
+    );
+    /*!
+        requires
+            - is_ranking_problem(samples) == true
+            - ranking_function == some kind of decision function object (e.g. decision_function)
+        ensures
+            - Tests the given ranking function on the supplied example ranking data and
+              returns the fraction of ranking pair orderings predicted correctly.  This is
+              a number in the range [0,1] where 0 means everything was incorrectly
+              predicted while 1 means everything was correctly predicted.  This function
+              also returns the mean average precision.
+            - In particular, this function returns a matrix M summarizing the results.
+              Specifically, it returns an M such that:
+                - M(0) == the fraction of times that the following is true:                
+                    - funct(samples[k].relevant[i]) > funct(samples[k].nonrelevant[j])
+                      (for all valid i,j,k)
+                - M(1) == the mean average precision of the rankings induced by funct.
+                  (Mean average precision is a number in the range 0 to 1.  Moreover, a
+                  mean average precision of 1 means everything was correctly predicted
+                  while smaller values indicate worse rankings.  See the documentation
+                  for average_precision() for details of its computation.)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename ranking_function,
+        typename T
+        >
+    matrix<double,1,2> test_ranking_function (
+        const ranking_function& funct,
+        const ranking_pair<T>& sample
+    );
+    /*!
+        requires
+            - is_ranking_problem(std::vector<ranking_pair<T> >(1, sample)) == true
+            - ranking_function == some kind of decision function object (e.g. decision_function)
+        ensures
+            - This is just a convenience routine for calling the above
+              test_ranking_function() routine.  That is, it just copies sample into a
+              std::vector object and invokes the above test_ranking_function() routine.
+              This means that calling this function is equivalent to invoking: 
+                return test_ranking_function(funct, std::vector<ranking_pair<T> >(1, sample));
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename T
+        >
+    matrix<double,1,2> cross_validate_ranking_trainer (
+        const trainer_type& trainer,
+        const std::vector<ranking_pair<T> >& samples,
+        const long folds
+    );
+    /*!
+        requires
+            - is_ranking_problem(samples) == true
+            - 1 < folds <= samples.size()
+            - trainer_type == some kind of ranking trainer object (e.g. svm_rank_trainer)
+        ensures
+            - Performs k-fold cross validation by using the given trainer to solve the
+              given ranking problem for the given number of folds.  Each fold is tested
+              using the output of the trainer and the average ranking accuracy as well as
+              the mean average precision over the number of folds is returned.
+            - The accuracy is computed the same way test_ranking_function() computes its
+              accuracy.  Therefore, it is a number in the range [0,1] that represents the
+              fraction of times a ranking pair's ordering was predicted correctly.  Similarly,
+              the mean average precision is computed identically to test_ranking_function().
+              In particular, this means that this function returns a matrix M such that:
+                - M(0) == the ranking accuracy
+                - M(1) == the mean average precision
+            - The number of folds used is given by the folds argument.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RANKING_ToOLS_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/rbf_network.h b/ml/dlib/dlib/svm/rbf_network.h
new file mode 100644
index 000000000..23a2c7424
--- /dev/null
+++ b/ml/dlib/dlib/svm/rbf_network.h
@@ -0,0 +1,162 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_RBf_NETWORK_
+#define DLIB_RBf_NETWORK_
+
+#include "../matrix.h"
+#include "rbf_network_abstract.h"
+#include "kernel.h"
+#include "linearly_independent_subset_finder.h"
+#include "function.h"
+#include "../algs.h"
+
+namespace dlib
+{
+
+// ------------------------------------------------------------------------------
+
+    template <
+        typename Kern 
+        >
+    class rbf_network_trainer 
+    {
+        /*!
+            This is an implementation of an RBF network trainer that follows
+            the directions right off Wikipedia basically.  So nothing 
+            particularly fancy.  Although the way the centers are selected
+            is somewhat unique.
+        !*/
+
+    public:
+        typedef Kern kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        rbf_network_trainer (
+        ) :
+            num_centers(10)
+        {
+        }
+
+        void set_kernel (
+            const kernel_type& k
+        )
+        {
+            kernel = k;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel;
+        }
+
+        void set_num_centers (
+            const unsigned long num 
+        )
+        {
+            num_centers = num;
+        }
+
+        unsigned long get_num_centers (
+        ) const
+        {
+            return num_centers;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            return do_train(mat(x), mat(y));
+        }
+
+        void swap (
+            rbf_network_trainer& item
+        )
+        {
+            exchange(kernel, item.kernel);
+            exchange(num_centers, item.num_centers);
+        }
+
+    private:
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            typedef typename decision_function<kernel_type>::scalar_vector_type scalar_vector_type;
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(x,y),
+                "\tdecision_function rbf_network_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t y.nr(): " << y.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                << "\n\t y.nc(): " << y.nc() 
+                );
+
+            // use the linearly_independent_subset_finder object to select the centers.  So here
+            // we show it all the data samples so it can find the best centers.
+            linearly_independent_subset_finder<kernel_type> lisf(kernel, num_centers);
+            fill_lisf(lisf, x);
+
+            const long num_centers = lisf.size();
+
+            // fill the K matrix with the output of the kernel for all the center and sample point pairs
+            matrix<scalar_type,0,0,mem_manager_type> K(x.nr(), num_centers+1);
+            for (long r = 0; r < x.nr(); ++r)
+            {
+                for (long c = 0; c < num_centers; ++c)
+                {
+                    K(r,c) = kernel(x(r), lisf[c]);
+                }
+                // This last column of the K matrix takes care of the bias term
+                K(r,num_centers) = 1;
+            }
+
+            // compute the best weights by using the pseudo-inverse
+            scalar_vector_type weights(pinv(K)*y);
+
+            // now put everything into a decision_function object and return it
+            return decision_function<kernel_type> (remove_row(weights,num_centers),
+                                                   -weights(num_centers),
+                                                   kernel,
+                                                   lisf.get_dictionary());
+
+        }
+
+        kernel_type kernel;
+        unsigned long num_centers;
+
+    }; // end of class rbf_network_trainer 
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename sample_type>
+    void swap (
+        rbf_network_trainer<sample_type>& a,
+        rbf_network_trainer<sample_type>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RBf_NETWORK_
+
diff --git a/ml/dlib/dlib/svm/rbf_network_abstract.h b/ml/dlib/dlib/svm/rbf_network_abstract.h
new file mode 100644
index 000000000..782a4bdbd
--- /dev/null
+++ b/ml/dlib/dlib/svm/rbf_network_abstract.h
@@ -0,0 +1,132 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_RBf_NETWORK_ABSTRACT_
+#ifdef DLIB_RBf_NETWORK_ABSTRACT_
+
+#include "../algs.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class rbf_network_trainer 
+    {
+        /*!
+            REQUIREMENTS ON K 
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+                (since this is supposed to be a RBF network it is probably reasonable
+                to use some sort of radial basis kernel)
+
+            INITIAL VALUE
+                - get_num_centers() == 10 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements a trainer for a radial basis function network.
+
+                The implementation of this algorithm follows the normal RBF training 
+                process.  For more details see the code or the Wikipedia article
+                about RBF networks.  
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        rbf_network_trainer (
+        ); 
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        void set_kernel (
+            const kernel_type& k
+        );
+        /*!
+            ensures
+                - #get_kernel() == k 
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object
+        !*/
+
+        void set_num_centers (
+            const unsigned long num_centers
+        );
+        /*!
+            ensures
+                - #get_num_centers() == num_centers
+        !*/
+
+        const unsigned long get_num_centers (
+        ) const;
+        /*!
+            ensures
+                - returns the maximum number of centers (a.k.a. basis_vectors in the 
+                  trained decision_function) you will get when you train this object on data.
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        /*!
+            requires
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+                - is_learning_problem(x,y) == true
+            ensures
+                - trains a RBF network given the training samples in x and 
+                  labels in y and returns the resulting decision_function
+            throws
+                - std::bad_alloc
+        !*/
+
+        void swap (
+            rbf_network_trainer& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename K>
+    void swap (
+        rbf_network_trainer<K>& a,
+        rbf_network_trainer<K>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RBf_NETWORK_ABSTRACT_
+
+
+
diff --git a/ml/dlib/dlib/svm/reduced.h b/ml/dlib/dlib/svm/reduced.h
new file mode 100644
index 000000000..b4c5b63ca
--- /dev/null
+++ b/ml/dlib/dlib/svm/reduced.h
@@ -0,0 +1,613 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_REDUCEd_TRAINERS_
+#define DLIB_REDUCEd_TRAINERS_
+
+#include "reduced_abstract.h"
+#include "../matrix.h"
+#include "../algs.h"
+#include "function.h"
+#include "kernel.h"
+#include "kcentroid.h"
+#include "linearly_independent_subset_finder.h"
+#include "../optimization.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type 
+        >
+    class reduced_decision_function_trainer
+    {
+    public:
+        typedef typename trainer_type::kernel_type kernel_type;
+        typedef typename trainer_type::scalar_type scalar_type;
+        typedef typename trainer_type::sample_type sample_type;
+        typedef typename trainer_type::mem_manager_type mem_manager_type;
+        typedef typename trainer_type::trained_function_type trained_function_type;
+
+        reduced_decision_function_trainer (
+        ) :num_bv(0) {}
+
+        reduced_decision_function_trainer (
+            const trainer_type& trainer_,
+            const unsigned long num_sb_ 
+        ) :
+            trainer(trainer_),
+            num_bv(num_sb_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(num_bv > 0,
+                        "\t reduced_decision_function_trainer()"
+                        << "\n\t you have given invalid arguments to this function"
+                        << "\n\t num_bv: " << num_bv 
+            );
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(num_bv > 0,
+                        "\t reduced_decision_function_trainer::train(x,y)"
+                        << "\n\t You have tried to use an uninitialized version of this object"
+                        << "\n\t num_bv: " << num_bv );
+            return do_train(mat(x), mat(y));
+        }
+
+    private:
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            // get the decision function object we are going to try and approximate
+            const decision_function<kernel_type>& dec_funct = trainer.train(x,y);
+            
+            // now find a linearly independent subset of the training points of num_bv points.
+            linearly_independent_subset_finder<kernel_type> lisf(dec_funct.kernel_function, num_bv);
+            fill_lisf(lisf, x);
+
+            // The next few statements just find the best weights with which to approximate 
+            // the dec_funct object with the smaller set of vectors in the lisf dictionary.  This
+            // is really just a simple application of some linear algebra.  For the details 
+            // see page 554 of Learning with kernels by Scholkopf and Smola where they talk 
+            // about "Optimal Expansion Coefficients."
+
+            const kernel_type kern(dec_funct.kernel_function);
+
+            matrix<scalar_type,0,1,mem_manager_type> alpha;
+
+            alpha = lisf.get_inv_kernel_marix()*(kernel_matrix(kern,lisf,dec_funct.basis_vectors)*dec_funct.alpha);
+
+            decision_function<kernel_type> new_df(alpha, 
+                                                  0,
+                                                  kern, 
+                                                  lisf.get_dictionary());
+
+            // now we have to figure out what the new bias should be.  It might be a little
+            // different since we just messed with all the weights and vectors.
+            double bias = 0;
+            for (long i = 0; i < x.nr(); ++i)
+            {
+                bias += new_df(x(i)) - dec_funct(x(i));
+            }
+            
+            new_df.b = bias/x.nr();
+
+            return new_df;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        trainer_type trainer;
+        unsigned long num_bv; 
+
+
+    }; // end of class reduced_decision_function_trainer
+
+    template <typename trainer_type>
+    const reduced_decision_function_trainer<trainer_type> reduced (
+        const trainer_type& trainer,
+        const unsigned long num_bv
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(num_bv > 0,
+                    "\tconst reduced_decision_function_trainer reduced()"
+                    << "\n\t you have given invalid arguments to this function"
+                    << "\n\t num_bv: " << num_bv 
+        );
+
+        return reduced_decision_function_trainer<trainer_type>(trainer, num_bv);
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    namespace red_impl
+    {
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename kernel_type>
+        class objective
+        {
+            /*
+                This object represents the objective function we will try to
+                minimize in approximate_distance_function().  
+
+                The objective is the distance, in kernel induced feature space, between
+                the original distance function and the approximated version.
+
+            */
+            typedef typename kernel_type::scalar_type scalar_type;
+            typedef typename kernel_type::sample_type sample_type;
+            typedef typename kernel_type::mem_manager_type mem_manager_type;
+        public:
+            objective(
+                const distance_function<kernel_type>& dist_funct_,
+                matrix<scalar_type,0,1,mem_manager_type>& b_,
+                matrix<sample_type,0,1,mem_manager_type>& out_vectors_
+            ) :
+                dist_funct(dist_funct_),
+                b(b_),
+                out_vectors(out_vectors_)
+            {
+            }
+
+            const matrix<scalar_type, 0, 1, mem_manager_type> state_to_vector (
+            ) const
+            /*!
+                ensures
+                    - returns a vector that contains all the information necessary to
+                      reproduce the current state of the approximated distance function
+            !*/
+            {
+                matrix<scalar_type, 0, 1, mem_manager_type> z(b.nr() + out_vectors.size()*out_vectors(0).nr());
+                long i = 0;
+                for (long j = 0; j < b.nr(); ++j)
+                {
+                    z(i) = b(j);
+                    ++i;
+                }
+
+                for (long j = 0; j < out_vectors.size(); ++j)
+                {
+                    for (long k = 0; k < out_vectors(j).size(); ++k)
+                    {
+                        z(i) = out_vectors(j)(k);
+                        ++i;
+                    }
+                }
+                return z;
+            }
+
+
+            void vector_to_state (
+                const matrix<scalar_type, 0, 1, mem_manager_type>& z
+            ) const
+            /*!
+                requires
+                    - z came from the state_to_vector() function or has a compatible format
+                ensures
+                    - loads the vector z into the state variables of the approximate
+                      distance function (i.e. b and out_vectors)
+            !*/
+            {
+                long i = 0;
+                for (long j = 0; j < b.nr(); ++j)
+                {
+                    b(j) = z(i);
+                    ++i;
+                }
+
+                for (long j = 0; j < out_vectors.size(); ++j)
+                {
+                    for (long k = 0; k < out_vectors(j).size(); ++k)
+                    {
+                        out_vectors(j)(k) = z(i);
+                        ++i;
+                    }
+                }
+            }
+
+            double operator() (
+                const matrix<scalar_type, 0, 1, mem_manager_type>& z
+            ) const
+            /*!
+                ensures
+                    - loads the current approximate distance function with z
+                    - returns the distance between the original distance function
+                      and the approximate one.
+            !*/
+            {
+                vector_to_state(z);
+                const kernel_type k(dist_funct.get_kernel());
+
+                double temp = 0;
+                for (long i = 0; i < out_vectors.size(); ++i)
+                {
+                    for (long j = 0; j < dist_funct.get_basis_vectors().nr(); ++j)
+                    {
+                        temp -= b(i)*dist_funct.get_alpha()(j)*k(out_vectors(i), dist_funct.get_basis_vectors()(j));
+                    }
+                }
+
+                temp *= 2;
+
+                for (long i = 0; i < out_vectors.size(); ++i)
+                {
+                    for (long j = 0; j < out_vectors.size(); ++j)
+                    {
+                        temp += b(i)*b(j)*k(out_vectors(i), out_vectors(j));
+                    }
+                }
+
+                return temp + dist_funct.get_squared_norm();
+            }
+
+        private:
+
+            const distance_function<kernel_type>& dist_funct;
+            matrix<scalar_type,0,1,mem_manager_type>& b;
+            matrix<sample_type,0,1,mem_manager_type>& out_vectors;
+
+        };
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename kernel_type>
+        class objective_derivative
+        {
+            /*!
+                This object represents the derivative of the objective object
+            !*/
+            typedef typename kernel_type::scalar_type scalar_type;
+            typedef typename kernel_type::sample_type sample_type;
+            typedef typename kernel_type::mem_manager_type mem_manager_type;
+        public:
+
+
+            objective_derivative(
+                const distance_function<kernel_type>& dist_funct_,
+                matrix<scalar_type,0,1,mem_manager_type>& b_,
+                matrix<sample_type,0,1,mem_manager_type>& out_vectors_
+            ) :
+                dist_funct(dist_funct_),
+                b(b_),
+                out_vectors(out_vectors_)
+            {
+            }
+
+            void vector_to_state (
+                const matrix<scalar_type, 0, 1, mem_manager_type>& z
+            ) const
+            /*!
+                requires
+                    - z came from the state_to_vector() function or has a compatible format
+                ensures
+                    - loads the vector z into the state variables of the approximate
+                      distance function (i.e. b and out_vectors)
+            !*/
+            {
+                long i = 0;
+                for (long j = 0; j < b.nr(); ++j)
+                {
+                    b(j) = z(i);
+                    ++i;
+                }
+
+                for (long j = 0; j < out_vectors.size(); ++j)
+                {
+                    for (long k = 0; k < out_vectors(j).size(); ++k)
+                    {
+                        out_vectors(j)(k) = z(i);
+                        ++i;
+                    }
+                }
+            }
+
+            const matrix<scalar_type,0,1,mem_manager_type>& operator() (
+                const matrix<scalar_type, 0, 1, mem_manager_type>& z
+            ) const
+            /*!
+                ensures
+                    - loads the current approximate distance function with z
+                    - returns the derivative of the distance between the original 
+                      distance function and the approximate one.
+            !*/
+            {
+                vector_to_state(z);
+                res.set_size(z.nr());
+                set_all_elements(res,0);
+                const kernel_type k(dist_funct.get_kernel());
+                const kernel_derivative<kernel_type> K_der(k);
+
+                // first compute the gradient for the beta weights
+                for (long i = 0; i < out_vectors.size(); ++i)
+                {
+                    for (long j = 0; j < out_vectors.size(); ++j)
+                    {
+                        res(i) += b(j)*k(out_vectors(i), out_vectors(j)); 
+                    }
+                }
+                for (long i = 0; i < out_vectors.size(); ++i)
+                {
+                    for (long j = 0; j < dist_funct.get_basis_vectors().size(); ++j)
+                    {
+                        res(i) -= dist_funct.get_alpha()(j)*k(out_vectors(i), dist_funct.get_basis_vectors()(j)); 
+                    }
+                }
+
+
+                // now compute the gradient of the actual vectors that go into the kernel functions
+                long pos = out_vectors.size();
+                const long num = out_vectors(0).nr();
+                temp.set_size(num,1);
+                for (long i = 0; i < out_vectors.size(); ++i)
+                {
+                    set_all_elements(temp,0);
+                    for (long j = 0; j < out_vectors.size(); ++j)
+                    {
+                        temp += b(j)*K_der(out_vectors(j), out_vectors(i));
+                    }
+                    for (long j = 0; j < dist_funct.get_basis_vectors().nr(); ++j)
+                    {
+                        temp -= dist_funct.get_alpha()(j)*K_der(dist_funct.get_basis_vectors()(j), out_vectors(i) );
+                    }
+
+                    // store the gradient for out_vectors(i) into result in the proper spot
+                    set_subm(res,pos,0,num,1) = b(i)*temp;
+                    pos += num;
+                }
+
+
+                res *= 2;
+                return res;
+            }
+
+        private:
+
+            mutable matrix<scalar_type, 0, 1, mem_manager_type> res;
+            mutable sample_type temp;
+
+            const distance_function<kernel_type>& dist_funct;
+            matrix<scalar_type,0,1,mem_manager_type>& b;
+            matrix<sample_type,0,1,mem_manager_type>& out_vectors;
+
+        };
+
+    // ------------------------------------------------------------------------------------
+
+    }
+
+    template <
+        typename K,
+        typename stop_strategy_type,
+        typename T
+        >
+    distance_function<K> approximate_distance_function (
+        stop_strategy_type stop_strategy,
+        const distance_function<K>& target,
+        const T& starting_basis
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(target.get_basis_vectors().size() > 0 &&
+                    starting_basis.size() > 0,
+                    "\t  distance_function approximate_distance_function()"
+                    << "\n\t Invalid inputs were given to this function."
+                    << "\n\t target.get_basis_vectors().size(): " << target.get_basis_vectors().size() 
+                    << "\n\t starting_basis.size():             " << starting_basis.size() 
+        );
+
+        using namespace red_impl;
+        // The next few statements just find the best weights with which to approximate 
+        // the target object with the set of basis vectors in starting_basis.  This
+        // is really just a simple application of some linear algebra.  For the details 
+        // see page 554 of Learning with kernels by Scholkopf and Smola where they talk 
+        // about "Optimal Expansion Coefficients."
+
+        const K kern(target.get_kernel());
+        typedef typename K::scalar_type scalar_type;
+        typedef typename K::sample_type sample_type;
+        typedef typename K::mem_manager_type mem_manager_type;
+
+        matrix<scalar_type,0,1,mem_manager_type> beta;
+
+        // Now we compute the fist approximate distance function.  
+        beta = pinv(kernel_matrix(kern,starting_basis)) *
+            (kernel_matrix(kern,starting_basis,target.get_basis_vectors())*target.get_alpha());
+        matrix<sample_type,0,1,mem_manager_type> out_vectors(mat(starting_basis));
+
+
+        // Now setup to do a global optimization of all the parameters in the approximate 
+        // distance function.  
+        const objective<K> obj(target, beta, out_vectors);
+        const objective_derivative<K> obj_der(target, beta, out_vectors);
+        matrix<scalar_type,0,1,mem_manager_type> opt_starting_point(obj.state_to_vector());
+
+
+        // perform a full optimization of all the parameters (i.e. both beta and the basis vectors together)
+        find_min(lbfgs_search_strategy(20),
+                 stop_strategy,
+                 obj, obj_der, opt_starting_point, 0); 
+
+        // now make sure that the final optimized value is loaded into the beta and
+        // out_vectors matrices
+        obj.vector_to_state(opt_starting_point);
+
+        // Do a final reoptimization of beta just to make sure it is optimal given the new
+        // set of basis vectors.
+        beta = pinv(kernel_matrix(kern,out_vectors))*(kernel_matrix(kern,out_vectors,target.get_basis_vectors())*target.get_alpha());
+
+        // It is possible that some of the beta weights will be very close to zero.  Lets remove
+        // the basis vectors with these essentially zero weights.
+        const scalar_type eps = max(abs(beta))*std::numeric_limits<scalar_type>::epsilon();
+        for (long i = 0; i < beta.size(); ++i)
+        {
+            // if beta(i) is zero (but leave at least one beta no matter what)
+            if (std::abs(beta(i)) < eps && beta.size() > 1)
+            {
+                beta = remove_row(beta, i);
+                out_vectors = remove_row(out_vectors, i);
+                --i;
+            }
+        }
+
+        return distance_function<K>(beta, kern, out_vectors);
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type 
+        >
+    class reduced_decision_function_trainer2
+    {
+    public:
+        typedef typename trainer_type::kernel_type kernel_type;
+        typedef typename trainer_type::scalar_type scalar_type;
+        typedef typename trainer_type::sample_type sample_type;
+        typedef typename trainer_type::mem_manager_type mem_manager_type;
+        typedef typename trainer_type::trained_function_type trained_function_type;
+
+        reduced_decision_function_trainer2 () : num_bv(0) {}
+        reduced_decision_function_trainer2 (
+            const trainer_type& trainer_,
+            const long num_sb_,
+            const double eps_ = 1e-3
+        ) :
+            trainer(trainer_),
+            num_bv(num_sb_),
+            eps(eps_)
+        {
+            COMPILE_TIME_ASSERT(is_matrix<sample_type>::value);
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(num_bv > 0 && eps > 0,
+                        "\t reduced_decision_function_trainer2()"
+                        << "\n\t you have given invalid arguments to this function"
+                        << "\n\t num_bv: " << num_bv 
+                        << "\n\t eps:    " << eps 
+            );
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(num_bv > 0,
+                        "\t reduced_decision_function_trainer2::train(x,y)"
+                        << "\n\t You have tried to use an uninitialized version of this object"
+                        << "\n\t num_bv: " << num_bv );
+            return do_train(mat(x), mat(y));
+        }
+
+    private:
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            // get the decision function object we are going to try and approximate
+            const decision_function<kernel_type>& dec_funct = trainer.train(x,y);
+            const kernel_type kern(dec_funct.kernel_function);
+            
+            // now find a linearly independent subset of the training points of num_bv points.
+            linearly_independent_subset_finder<kernel_type> lisf(kern, num_bv);
+            fill_lisf(lisf,x);
+
+            distance_function<kernel_type> approx, target;
+            target = dec_funct;
+            approx = approximate_distance_function(objective_delta_stop_strategy(eps), target, lisf);
+
+            decision_function<kernel_type> new_df(approx.get_alpha(), 
+                                                  0,
+                                                  kern, 
+                                                  approx.get_basis_vectors());
+
+            // now we have to figure out what the new bias should be.  It might be a little
+            // different since we just messed with all the weights and vectors.
+            double bias = 0;
+            for (long i = 0; i < x.nr(); ++i)
+            {
+                bias += new_df(x(i)) - dec_funct(x(i));
+            }
+            
+            new_df.b = bias/x.nr();
+
+            return new_df;
+
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        trainer_type trainer;
+        long num_bv;
+        double eps;
+
+
+    }; // end of class reduced_decision_function_trainer2
+
+    template <typename trainer_type>
+    const reduced_decision_function_trainer2<trainer_type> reduced2 (
+        const trainer_type& trainer,
+        const long num_bv,
+        double eps = 1e-3
+    )
+    {
+        COMPILE_TIME_ASSERT(is_matrix<typename trainer_type::sample_type>::value);
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT(num_bv > 0 && eps > 0,
+                    "\tconst reduced_decision_function_trainer2 reduced2()"
+                    << "\n\t you have given invalid arguments to this function"
+                    << "\n\t num_bv: " << num_bv 
+                    << "\n\t eps:    " << eps 
+        );
+
+        return reduced_decision_function_trainer2<trainer_type>(trainer, num_bv, eps);
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_REDUCEd_TRAINERS_
+
diff --git a/ml/dlib/dlib/svm/reduced_abstract.h b/ml/dlib/dlib/svm/reduced_abstract.h
new file mode 100644
index 000000000..8b186c033
--- /dev/null
+++ b/ml/dlib/dlib/svm/reduced_abstract.h
@@ -0,0 +1,267 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_REDUCED_TRAINERs_ABSTRACT_
+#ifdef DLIB_REDUCED_TRAINERs_ABSTRACT_
+
+#include "../matrix.h"
+#include "../algs.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "../optimization.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type 
+        >
+    class reduced_decision_function_trainer
+    {
+        /*!
+            REQUIREMENTS ON trainer_type
+                - trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer)
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents an implementation of a reduced set algorithm.  
+                This object acts as a post processor for anything that creates 
+                decision_function objects.  It wraps another trainer object and 
+                performs this reduced set post processing with the goal of 
+                representing the original decision function in a form that 
+                involves fewer basis vectors.
+        !*/
+
+    public:
+        typedef typename trainer_type::kernel_type kernel_type;
+        typedef typename trainer_type::scalar_type scalar_type;
+        typedef typename trainer_type::sample_type sample_type;
+        typedef typename trainer_type::mem_manager_type mem_manager_type;
+        typedef typename trainer_type::trained_function_type trained_function_type;
+
+        reduced_decision_function_trainer (
+        );
+        /*!
+            ensures
+                - This object is in an uninitialized state.  You must
+                  construct a real one with the other constructor and assign it
+                  to this instance before you use this object.
+        !*/
+
+        reduced_decision_function_trainer (
+            const trainer_type& trainer,
+            const unsigned long num_bv 
+        );
+        /*!
+            requires
+                - num_bv > 0
+            ensures
+                - returns a trainer object that applies post processing to the decision_function
+                  objects created by the given trainer object with the goal of creating
+                  decision_function objects with fewer basis vectors.
+                - The reduced decision functions that are output will have at most
+                  num_bv basis vectors.
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            ensures
+                - trains a decision_function using the trainer that was supplied to
+                  this object's constructor and then finds a reduced representation
+                  for it and returns the reduced version.  
+            throws
+                - std::bad_alloc
+                - any exceptions thrown by the trainer_type object
+        !*/
+
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const reduced_decision_function_trainer<trainer_type> reduced (
+        const trainer_type& trainer,
+        const unsigned long num_bv
+    ) { return reduced_decision_function_trainer<trainer_type>(trainer, num_bv); }
+    /*!
+        requires
+            - num_bv > 0
+            - trainer_type == some kind of batch trainer object that creates decision_function
+              objects (e.g. svm_nu_trainer)
+        ensures
+            - returns a reduced_decision_function_trainer object that has been
+              instantiated with the given arguments.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K,
+        typename stop_strategy_type,
+        typename T
+        >
+    distance_function<K> approximate_distance_function (
+        stop_strategy_type stop_strategy,
+        const distance_function<K>& target,
+        const T& starting_basis
+    );
+    /*!
+        requires
+            - stop_strategy == an object that defines a stop strategy such as one of 
+              the objects from dlib/optimization/optimization_stop_strategies_abstract.h
+            - requirements on starting_basis
+                - T must be a dlib::matrix type or something convertible to a matrix via mat()
+                  (e.g. a std::vector).  Additionally, starting_basis must contain K::sample_type
+                  objects which can be supplied to the kernel function used by target.
+                - is_vector(starting_basis) == true
+                - starting_basis.size() > 0
+            - target.get_basis_vectors().size() > 0 
+            - kernel_derivative<K> is defined
+              (i.e. The analytic derivative for the given kernel must be defined)
+            - K::sample_type must be a dlib::matrix object and the basis_vectors inside target
+              and starting_basis must be column vectors.
+        ensures
+            - This routine attempts to find a distance_function object which is close
+              to the given target.  That is, it searches for an X such that target(X) is
+              minimized.  The optimization begins with an X in the span of the elements
+              of starting_basis and searches for an X which locally minimizes target(X).  
+              Since this problem can have many local minima, the quality of the starting 
+              basis can significantly influence the results.   
+            - The optimization is over all variables in a distance_function, however,
+              the size of the basis set is constrained to no more than starting_basis.size().
+              That is, in the returned distance_function DF, we will have: 
+                - DF.get_basis_vectors().size() <= starting_basis.size()
+            - The optimization is carried out until the stop_strategy indicates it 
+              should stop.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type 
+        >
+    class reduced_decision_function_trainer2
+    {
+        /*!
+            REQUIREMENTS ON trainer_type
+                - trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer)
+                - trainer_type::sample_type must be a dlib::matrix object
+                - kernel_derivative<trainer_type::kernel_type> must be defined
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents an implementation of a reduced set algorithm.  
+                This object acts as a post processor for anything that creates 
+                decision_function objects.  It wraps another trainer object and 
+                performs this reduced set post processing with the goal of 
+                representing the original decision function in a form that 
+                involves fewer basis vectors.
+
+                This object's implementation is the same as that in the above
+                reduced_decision_function_trainer object except it also performs 
+                a global gradient based optimization at the end to further
+                improve the approximation to the original decision function
+                object. 
+        !*/
+
+    public:
+        typedef typename trainer_type::kernel_type kernel_type;
+        typedef typename trainer_type::scalar_type scalar_type;
+        typedef typename trainer_type::sample_type sample_type;
+        typedef typename trainer_type::mem_manager_type mem_manager_type;
+        typedef typename trainer_type::trained_function_type trained_function_type;
+
+        reduced_decision_function_trainer2 (
+        );
+        /*!
+            ensures
+                - This object is in an uninitialized state.  You must
+                  construct a real one with the other constructor and assign it
+                  to this instance before you use this object.
+        !*/
+
+        reduced_decision_function_trainer2 (
+            const trainer_type& trainer,
+            const unsigned long num_bv,
+            double eps = 1e-3
+        );
+        /*!
+            requires
+                - num_bv > 0
+                - eps > 0
+            ensures
+                - returns a trainer object that applies post processing to the decision_function
+                  objects created by the given trainer object with the goal of creating
+                  decision_function objects with fewer basis vectors.
+                - The reduced decision functions that are output will have at most
+                  num_bv basis vectors.
+                - the gradient based optimization will continue until the change in the
+                  objective function is less than eps.  So smaller values of eps will
+                  give better results but take longer to compute.
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - x must be a list of objects which are each some kind of dlib::matrix 
+                  which represents column or row vectors.
+            ensures
+                - trains a decision_function using the trainer that was supplied to
+                  this object's constructor and then finds a reduced representation
+                  for it and returns the reduced version.  
+            throws
+                - std::bad_alloc
+                - any exceptions thrown by the trainer_type object
+        !*/
+
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const reduced_decision_function_trainer2<trainer_type> reduced2 (
+        const trainer_type& trainer,
+        const unsigned long num_bv,
+        double eps = 1e-3
+    ) { return reduced_decision_function_trainer2<trainer_type>(trainer, num_bv, eps); }
+    /*!
+        requires
+            - num_bv > 0
+            - trainer_type == some kind of batch trainer object that creates decision_function
+              objects (e.g. svm_nu_trainer)
+            - kernel_derivative<trainer_type::kernel_type> is defined
+            - eps > 0
+        ensures
+            - returns a reduced_decision_function_trainer2 object that has been
+              instantiated with the given arguments.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_REDUCED_TRAINERs_ABSTRACT_
+
diff --git a/ml/dlib/dlib/svm/rls.h b/ml/dlib/dlib/svm/rls.h
new file mode 100644
index 000000000..edee6b062
--- /dev/null
+++ b/ml/dlib/dlib/svm/rls.h
@@ -0,0 +1,232 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_RLs_Hh_
+#define DLIB_RLs_Hh_
+
+#include "rls_abstract.h"
+#include "../matrix.h"
+#include "function.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class rls
+    {
+
+    public:
+
+
+        explicit rls(
+            double forget_factor_,
+            double C_ = 1000,
+            bool apply_forget_factor_to_C_ = false
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < forget_factor_ && forget_factor_ <= 1 &&
+                        0 < C_,
+                "\t rls::rls()"
+                << "\n\t invalid arguments were given to this function"
+                << "\n\t forget_factor_: " << forget_factor_ 
+                << "\n\t C_:   " << C_ 
+                << "\n\t this: " << this
+                );
+
+
+            C = C_;
+            forget_factor = forget_factor_;
+            apply_forget_factor_to_C = apply_forget_factor_to_C_;
+        }
+
+        rls(
+        )
+        {
+            C = 1000;
+            forget_factor = 1;
+            apply_forget_factor_to_C = false;
+        }
+
+        double get_c(
+        ) const
+        {
+            return C;
+        }
+
+        double get_forget_factor(
+        ) const
+        {
+            return forget_factor;
+        }
+
+        bool should_apply_forget_factor_to_C (
+        ) const 
+        {
+            return apply_forget_factor_to_C;
+        }
+
+        template <typename EXP>
+        void train (
+            const matrix_exp<EXP>& x,
+            double y
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(x) &&
+                        (get_w().size() == 0 || get_w().size() == x.size()),
+                "\t void rls::train()"
+                << "\n\t invalid arguments were given to this function"
+                << "\n\t is_col_vector(x): " << is_col_vector(x) 
+                << "\n\t x.size():         " << x.size() 
+                << "\n\t get_w().size():   " << get_w().size() 
+                << "\n\t this: " << this
+                );
+
+            if (R.size() == 0)
+            {
+                R = identity_matrix<double>(x.size())*C;
+                w.set_size(x.size());
+                w = 0;
+            }
+
+            // multiply by forget factor and incorporate x*trans(x) into R.
+            const double l = 1.0/forget_factor;
+            const double temp = 1 + l*trans(x)*R*x;
+            tmp = R*x;
+            R = l*R - l*l*(tmp*trans(tmp))/temp;
+
+            // Since we multiplied by the forget factor, we need to add (1-forget_factor) of the
+            // identity matrix back in to keep the regularization alive.  
+            if (forget_factor != 1 && !apply_forget_factor_to_C)
+                add_eye_to_inv(R, (1-forget_factor)/C);
+
+            // R should always be symmetric.  This line improves numeric stability of this algorithm.
+            if (cnt%10 == 0)
+                R = 0.5*(R + trans(R));
+            ++cnt;
+
+            w = w + R*x*(y - trans(x)*w);
+
+        }
+
+
+
+        const matrix<double,0,1>& get_w(
+        ) const
+        {
+            return w;
+        }
+
+        template <typename EXP>
+        double operator() (
+            const matrix_exp<EXP>& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(x) && get_w().size() == x.size(),
+                "\t double rls::operator()()"
+                << "\n\t invalid arguments were given to this function"
+                << "\n\t is_col_vector(x): " << is_col_vector(x) 
+                << "\n\t x.size():         " << x.size() 
+                << "\n\t get_w().size():   " << get_w().size() 
+                << "\n\t this: " << this
+                );
+
+            return dot(x,w);
+        }
+
+        decision_function<linear_kernel<matrix<double,0,1> > > get_decision_function (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(get_w().size() != 0,
+                "\t decision_function rls::get_decision_function()"
+                << "\n\t invalid arguments were given to this function"
+                << "\n\t get_w().size():   " << get_w().size() 
+                << "\n\t this: " << this
+                );
+
+            decision_function<linear_kernel<matrix<double,0,1> > > df;
+            df.alpha.set_size(1);
+            df.basis_vectors.set_size(1);
+            df.b = 0;
+            df.alpha = 1;
+            df.basis_vectors(0) = w;
+
+            return df;
+        }
+
+        friend inline void serialize(const rls& item, std::ostream& out)
+        {
+            int version = 2;
+            serialize(version, out);
+            serialize(item.w, out);
+            serialize(item.R, out);
+            serialize(item.C, out);
+            serialize(item.forget_factor, out);
+            serialize(item.cnt, out);
+            serialize(item.apply_forget_factor_to_C, out);
+        }
+
+        friend inline void deserialize(rls& item, std::istream& in)
+        {
+            int version = 0;
+            deserialize(version, in);
+            if (!(1 <= version && version <= 2))
+                throw dlib::serialization_error("Unknown version number found while deserializing rls object.");
+
+            if (version >= 1)
+            {
+                deserialize(item.w, in);
+                deserialize(item.R, in);
+                deserialize(item.C, in);
+                deserialize(item.forget_factor, in);
+            }
+            item.cnt = 0;
+            item.apply_forget_factor_to_C = false;
+            if (version >= 2)
+            {
+                deserialize(item.cnt, in);
+                deserialize(item.apply_forget_factor_to_C, in);
+            }
+        }
+
+    private:
+
+        void add_eye_to_inv(
+            matrix<double>& m,
+            double C
+        )
+        /*!
+            ensures
+                - Let m == inv(M)
+                - this function returns inv(M + C*identity_matrix<double>(m.nr()))
+        !*/
+        {
+            for (long r = 0; r < m.nr(); ++r)
+            {
+                m = m - colm(m,r)*trans(colm(m,r))/(1/C + m(r,r));
+            }
+        }
+
+
+        matrix<double,0,1> w;
+        matrix<double> R;
+        double C;
+        double forget_factor;
+        int cnt = 0;
+        bool apply_forget_factor_to_C;
+
+
+        // This object is here only to avoid reallocation during training.  It don't
+        // logically contribute to the state of this object.
+        matrix<double,0,1> tmp;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RLs_Hh_
+
diff --git a/ml/dlib/dlib/svm/rls_abstract.h b/ml/dlib/dlib/svm/rls_abstract.h
new file mode 100644
index 000000000..c593e4330
--- /dev/null
+++ b/ml/dlib/dlib/svm/rls_abstract.h
@@ -0,0 +1,175 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_RLs_ABSTRACT_Hh_
+#ifdef DLIB_RLs_ABSTRACT_Hh_
+
+#include "../matrix/matrix_abstract.h"
+#include "function_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class rls
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is an implementation of the linear version of the recursive least 
+                squares algorithm.  It accepts training points incrementally and, at 
+                each step, maintains the solution to the following optimization problem:
+                    find w minimizing: 0.5*dot(w,w) + C*sum_i(y_i - trans(x_i)*w)^2
+                Where (x_i,y_i) are training pairs.  x_i is some vector and y_i is a target
+                scalar value.
+
+                This object can also be configured to use exponential forgetting.  This is
+                where each training example is weighted by pow(forget_factor, i), where i 
+                indicates the sample's age.  So older samples are weighted less in the 
+                least squares solution and therefore become forgotten after some time.  
+                Therefore, with forgetting, this object solves the following optimization
+                problem at each step:
+                    find w minimizing: 0.5*dot(w,w) + C*sum_i pow(forget_factor, i)*(y_i - trans(x_i)*w)^2
+                Where i starts at 0 and i==0 corresponds to the most recent training point.
+        !*/
+
+    public:
+
+
+        explicit rls(
+            double forget_factor,
+            double C = 1000,
+            bool apply_forget_factor_to_C = false
+        );
+        /*!
+            requires
+                - 0 < forget_factor <= 1
+                - 0 < C
+            ensures
+                - #get_w().size() == 0
+                - #get_c() == C
+                - #get_forget_factor() == forget_factor
+                - #should_apply_forget_factor_to_C() == apply_forget_factor_to_C
+        !*/
+
+        rls(
+        );
+        /*!
+            ensures
+                - #get_w().size() == 0
+                - #get_c() == 1000
+                - #get_forget_factor() == 1
+                - #should_apply_forget_factor_to_C() == false
+        !*/
+
+        double get_c(
+        ) const;
+        /*!
+            ensures
+                - returns the regularization parameter.  It is the parameter 
+                  that determines the trade-off between trying to fit the training 
+                  data or allowing more errors but hopefully improving the generalization 
+                  of the resulting regression.  Larger values encourage exact fitting while 
+                  smaller values of C may encourage better generalization. 
+        !*/
+
+        double get_forget_factor(
+        ) const;
+        /*!
+            ensures
+                - returns the exponential forgetting factor.  A value of 1 disables forgetting
+                  and results in normal least squares regression.  On the other hand, a smaller 
+                  value causes the regression to forget about old training examples and prefer 
+                  instead to fit more recent examples.  The closer the forget factor is to
+                  zero the faster old examples are forgotten.
+        !*/
+
+        bool should_apply_forget_factor_to_C (
+        ) const;
+        /*!
+            ensures
+                - If this function returns false then it means we are optimizing the
+                  objective function discussed in the WHAT THIS OBJECT REPRESENTS section
+                  above. However, if it returns true then we will allow the forget factor
+                  (get_forget_factor()) to be applied to the C value which causes the
+                  algorithm to slowly increase C and convert into a textbook version of RLS
+                  without regularization.   The main reason you might want to do this is
+                  because it can make the algorithm run significantly faster.
+        !*/
+
+        template <typename EXP>
+        void train (
+            const matrix_exp<EXP>& x,
+            double y
+        )
+        /*!
+            requires
+                - is_col_vector(x) == true
+                - if (get_w().size() != 0) then
+                    - x.size() == get_w().size()
+                      (i.e. all training examples must have the same
+                      dimensionality)
+            ensures
+                - #get_w().size() == x.size()
+                - updates #get_w() such that it contains the solution to the least
+                  squares problem of regressing the given x onto the given y as well
+                  as all the previous training examples supplied to train().
+        !*/
+
+        const matrix<double,0,1>& get_w(
+        ) const;
+        /*!
+            ensures
+                - returns the regression weights.  These are the values learned by the
+                  least squares procedure.  If train() has not been called then this
+                  function returns an empty vector.
+        !*/
+
+        template <typename EXP>
+        double operator() (
+            const matrix_exp<EXP>& x
+        ) const;
+        /*!
+            requires
+                - is_col_vector(x) == true
+                - get_w().size() == x.size()
+            ensures
+                - returns dot(x, get_w())
+        !*/
+
+        decision_function<linear_kernel<matrix<double,0,1> > > get_decision_function (
+        ) const;
+        /*!
+            requires
+                - get_w().size() != 0
+            ensures
+                - returns a decision function DF such that:
+                    - DF(x) == dot(x, get_w())
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    void serialize (
+        const rls& item, 
+        std::ostream& out 
+    );   
+    /*!
+        provides serialization support 
+    !*/
+
+    void deserialize (
+        rls& item, 
+        std::istream& in
+    );   
+    /*!
+        provides deserialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RLs_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/roc_trainer.h b/ml/dlib/dlib/svm/roc_trainer.h
new file mode 100644
index 000000000..fa2c0ef9b
--- /dev/null
+++ b/ml/dlib/dlib/svm/roc_trainer.h
@@ -0,0 +1,149 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_ROC_TRAINEr_H_
+#define DLIB_ROC_TRAINEr_H_
+
+#include "roc_trainer_abstract.h"
+#include "../algs.h"
+#include <limits>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type 
+        >
+    class roc_trainer_type
+    {
+    public:
+        typedef typename trainer_type::kernel_type kernel_type;
+        typedef typename trainer_type::scalar_type scalar_type;
+        typedef typename trainer_type::sample_type sample_type;
+        typedef typename trainer_type::mem_manager_type mem_manager_type;
+        typedef typename trainer_type::trained_function_type trained_function_type;
+
+        roc_trainer_type (
+        ) : desired_accuracy(0), class_selection(0){}
+
+        roc_trainer_type (
+            const trainer_type& trainer_,
+            const scalar_type& desired_accuracy_,
+            const scalar_type& class_selection_
+        ) : trainer(trainer_), desired_accuracy(desired_accuracy_), class_selection(class_selection_) 
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 <= desired_accuracy && desired_accuracy <= 1 &&
+                         (class_selection == -1 || class_selection == +1), 
+                        "\t roc_trainer_type::roc_trainer_type()"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t desired_accuracy: " << desired_accuracy 
+                        << "\n\t class_selection:  " << class_selection 
+                        );
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const trained_function_type train (
+            const in_sample_vector_type& samples,
+            const in_scalar_vector_type& labels
+        ) const 
+        /*!
+            requires
+                - is_binary_classification_problem(samples, labels) == true
+        !*/
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_binary_classification_problem(samples, labels), 
+                        "\t roc_trainer_type::train()"
+                        << "\n\t invalid inputs were given to this function"
+                        );
+
+
+            return do_train(mat(samples), mat(labels));
+        }
+
+    private:
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const trained_function_type do_train (
+            const in_sample_vector_type& samples,
+            const in_scalar_vector_type& labels
+        ) const 
+        { 
+            trained_function_type df = trainer.train(samples, labels);
+
+            // clear out the old bias
+            df.b = 0;
+
+            // obtain all the scores from the df using all the class_selection labeled samples
+            std::vector<double> scores;
+            for (long i = 0; i < samples.size(); ++i)
+            {
+                if (labels(i) == class_selection)
+                    scores.push_back(df(samples(i)));
+            }
+
+            if (class_selection == +1)
+                std::sort(scores.rbegin(), scores.rend());
+            else
+                std::sort(scores.begin(), scores.end());
+
+            // now pick out the index that gives us the desired accuracy with regards to selected class 
+            unsigned long idx = static_cast<unsigned long>(desired_accuracy*scores.size() + 0.5);
+            if (idx >= scores.size())
+                idx = scores.size()-1;
+
+            df.b = scores[idx];
+
+            // In this case add a very small extra amount to the bias so that all the samples
+            // with the class_selection label are classified correctly.
+            if (desired_accuracy == 1)
+            {
+                if (class_selection == +1)
+                    df.b -= std::numeric_limits<scalar_type>::epsilon()*df.b;
+                else
+                    df.b += std::numeric_limits<scalar_type>::epsilon()*df.b;
+            }
+
+            return df;
+        }
+
+        trainer_type trainer;
+        scalar_type desired_accuracy;
+        scalar_type class_selection;
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const roc_trainer_type<trainer_type> roc_c1_trainer (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type& desired_accuracy
+    ) { return roc_trainer_type<trainer_type>(trainer, desired_accuracy, +1); }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const roc_trainer_type<trainer_type> roc_c2_trainer (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type& desired_accuracy
+    ) { return roc_trainer_type<trainer_type>(trainer, desired_accuracy, -1); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ROC_TRAINEr_H_
+
+
diff --git a/ml/dlib/dlib/svm/roc_trainer_abstract.h b/ml/dlib/dlib/svm/roc_trainer_abstract.h
new file mode 100644
index 000000000..74e6f9b65
--- /dev/null
+++ b/ml/dlib/dlib/svm/roc_trainer_abstract.h
@@ -0,0 +1,135 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_ROC_TRAINEr_ABSTRACT_
+#ifdef DLIB_ROC_TRAINEr_ABSTRACT_
+
+#include "../algs.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type 
+        >
+    class roc_trainer_type
+    {
+        /*!
+            REQUIREMENTS ON trainer_type
+                - trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer)
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a simple trainer post processor that allows you to 
+                easily adjust the bias term in a trained decision_function object.
+                That is, this object lets you pick a point on the ROC curve and 
+                it will adjust the bias term appropriately.  
+
+                So for example, suppose you wanted to set the bias term so that
+                the accuracy of your decision function on +1 labeled samples was 99%.
+                To do this you would use an instance of this object declared as follows:
+                    roc_trainer_type<trainer_type>(your_trainer, 0.99, +1);
+        !*/
+
+    public:
+        typedef typename trainer_type::kernel_type kernel_type;
+        typedef typename trainer_type::scalar_type scalar_type;
+        typedef typename trainer_type::sample_type sample_type;
+        typedef typename trainer_type::mem_manager_type mem_manager_type;
+        typedef typename trainer_type::trained_function_type trained_function_type;
+
+        roc_trainer_type (
+        );
+        /*!
+            ensures
+                - This object is in an uninitialized state.  You must
+                  construct a real one with the other constructor and assign it
+                  to this instance before you use this object.
+        !*/
+
+        roc_trainer_type (
+            const trainer_type& trainer_,
+            const scalar_type& desired_accuracy_,
+            const scalar_type& class_selection_
+        );
+        /*!
+            requires
+                - 0 <= desired_accuracy_ <= 1
+                - class_selection_ == +1 or -1
+            ensures
+                - when training is performed using this object it will automatically
+                  adjust the bias term in the returned decision function so that it
+                  achieves the desired accuracy on the selected class type.
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const trained_function_type train (
+            const in_sample_vector_type& samples,
+            const in_scalar_vector_type& labels
+        ) const 
+        /*!
+            requires
+                - is_binary_classification_problem(samples, labels) == true
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+            ensures
+                - performs training using the trainer object given to this object's 
+                  constructor, then modifies the bias term in the returned decision function
+                  as discussed above, and finally returns the decision function.
+        !*/
+
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const roc_trainer_type<trainer_type> roc_c1_trainer (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type& desired_accuracy
+    ) { return roc_trainer_type<trainer_type>(trainer, desired_accuracy, +1); }
+    /*!
+        requires
+            - 0 <= desired_accuracy <= 1
+            - trainer_type == some kind of batch trainer object that creates decision_function
+              objects (e.g. svm_nu_trainer)
+        ensures
+            - returns a roc_trainer_type object that has been instantiated with the given 
+              arguments.  The returned roc trainer will select the decision function
+              bias that gives the desired accuracy with respect to the +1 class.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    const roc_trainer_type<trainer_type> roc_c2_trainer (
+        const trainer_type& trainer,
+        const typename trainer_type::scalar_type& desired_accuracy
+    ) { return roc_trainer_type<trainer_type>(trainer, desired_accuracy, -1); }
+    /*!
+        requires
+            - 0 <= desired_accuracy <= 1
+            - trainer_type == some kind of batch trainer object that creates decision_function
+              objects (e.g. svm_nu_trainer)
+        ensures
+            - returns a roc_trainer_type object that has been instantiated with the given 
+              arguments.  The returned roc trainer will select the decision function
+              bias that gives the desired accuracy with respect to the -1 class.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_ROC_TRAINEr_ABSTRACT_
+
+
+
diff --git a/ml/dlib/dlib/svm/rr_trainer.h b/ml/dlib/dlib/svm/rr_trainer.h
new file mode 100644
index 000000000..09177217e
--- /dev/null
+++ b/ml/dlib/dlib/svm/rr_trainer.h
@@ -0,0 +1,456 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_RR_TRAInER_Hh_
+#define DLIB_RR_TRAInER_Hh_
+
+#include "../algs.h"
+#include "function.h"
+#include "kernel.h"
+#include "empirical_kernel_map.h"
+#include "linearly_independent_subset_finder.h"
+#include "../statistics.h"
+#include "rr_trainer_abstract.h"
+#include <vector>
+#include <iostream>
+
+namespace dlib
+{
+    template <
+        typename K 
+        >
+    class rr_trainer
+    {
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        // You are getting a compiler error on this line because you supplied a non-linear or 
+        // sparse kernel to the rr_trainer object.  You have to use dlib::linear_kernel with this trainer.
+        COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value));
+
+        rr_trainer (
+        ) :
+            verbose(false),
+            use_regression_loss(true),
+            lambda(0)
+        {
+            // default lambda search list
+            lams = matrix_cast<scalar_type>(logspace(-9, 2, 50)); 
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void use_regression_loss_for_loo_cv (
+        )
+        {
+            use_regression_loss = true;
+        }
+
+        void use_classification_loss_for_loo_cv (
+        )
+        {
+            use_regression_loss = false;
+        }
+
+        bool will_use_regression_loss_for_loo_cv (
+        ) const
+        {
+            return use_regression_loss;
+        }
+
+        const kernel_type get_kernel (
+        ) const
+        {
+            return kernel_type();
+        }
+
+        void set_lambda (
+            scalar_type lambda_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(lambda_ >= 0,
+                "\t void rr_trainer::set_lambda()"
+                << "\n\t lambda must be greater than or equal to 0"
+                << "\n\t lambda: " << lambda 
+                << "\n\t this:   " << this
+                );
+
+            lambda = lambda_;
+        }
+
+        const scalar_type get_lambda (
+        ) const
+        {
+            return lambda;
+        }
+
+        template <typename EXP>
+        void set_search_lambdas (
+            const matrix_exp<EXP>& lambdas
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_vector(lambdas) && lambdas.size() > 0 && min(lambdas) > 0,
+                "\t void rr_trainer::set_search_lambdas()"
+                << "\n\t lambdas must be a non-empty vector of values"
+                << "\n\t is_vector(lambdas): " << is_vector(lambdas) 
+                << "\n\t lambdas.size():     " << lambdas.size()
+                << "\n\t min(lambdas):       " << min(lambdas) 
+                << "\n\t this:   " << this
+                );
+
+
+            lams = matrix_cast<scalar_type>(lambdas);
+        }
+
+        const matrix<scalar_type,0,0,mem_manager_type>& get_search_lambdas (
+        ) const
+        {
+            return lams;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            std::vector<scalar_type> temp; 
+            scalar_type temp2;
+            return do_train(mat(x), mat(y), false, temp, temp2);
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            std::vector<scalar_type>& loo_values
+        ) const
+        {
+            scalar_type temp;
+            return do_train(mat(x), mat(y), true, loo_values, temp);
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            std::vector<scalar_type>& loo_values,
+            scalar_type& lambda_used 
+        ) const
+        {
+            return do_train(mat(x), mat(y), true, loo_values, lambda_used);
+        }
+
+
+    private:
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            const bool output_loo_values,
+            std::vector<scalar_type>& loo_values,
+            scalar_type& the_lambda
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(x,y),
+                "\t decision_function rr_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t is_vector(x): " << is_vector(x)
+                << "\n\t is_vector(y): " << is_vector(y)
+                << "\n\t x.size():     " << x.size() 
+                << "\n\t y.size():     " << y.size() 
+                );
+
+#ifdef ENABLE_ASSERTS
+            if (get_lambda() == 0 && will_use_regression_loss_for_loo_cv() == false)
+            {
+                // make sure requires clause is not broken
+                DLIB_ASSERT(is_binary_classification_problem(x,y),
+                    "\t decision_function rr_trainer::train(x,y)"
+                    << "\n\t invalid inputs were given to this function"
+                    );
+            }
+#endif
+
+            typedef matrix<scalar_type,0,1,mem_manager_type> column_matrix_type;
+            typedef matrix<scalar_type,0,0,mem_manager_type> general_matrix_type;
+
+            const long dims = x(0).size();
+
+            /*
+                Notes on the solution of ridge regression 
+
+                Let A = an x.size() by dims matrix which contains all the data samples.
+
+                Let I = an identity matrix
+
+                Let C = trans(A)*A
+                Let L = trans(A)*y
+
+                Then the optimal w is given by:
+                    w = inv(C + lambda*I) * L 
+
+
+                There is a trick to compute leave one out cross validation results for many different
+                lambda values quickly.  The following paper has a detailed discussion of various
+                approaches:
+
+                    Notes on Regularized Least Squares by Ryan M. Rifkin and Ross A. Lippert.
+
+                    In the implementation of the rr_trainer I'm only using two simple equations
+                    from the above paper.
+
+
+                    First note that inv(C + lambda*I) can be computed for many different lambda
+                    values in an efficient way by using an eigen decomposition of C.  So we use
+                    the fact that:
+                        inv(C + lambda*I) == V*inv(D + lambda*I)*trans(V)
+                        where V*D*trans(V) == C 
+
+                    Also, via some simple linear algebra the above paper works out that the leave one out 
+                    value for a sample x(i) is equal to the following:
+                        Let G = inv(C + lambda*I)
+                        let val = trans(x(i))*G*x(i);
+
+                        leave one out value for sample x(i):
+                        LOOV = (trans(w)*x(i) - y(i)*val) / (1 - val)
+
+                        leave one out error for sample x(i):
+                        LOOE = loss(y(i), LOOV)
+
+
+                Finally, note that we will pretend there was a 1 appended to the end of each
+                vector in x.  We won't actually do that though because we don't want to
+                have to make a copy of all the samples.  So throughout the following code 
+                I have explicitly dealt with this.
+            */
+
+            general_matrix_type C, tempm, G;
+            column_matrix_type  L, tempv, w;
+
+            // compute C and L
+            for (long i = 0; i < x.size(); ++i)
+            {
+                C += x(i)*trans(x(i));
+                L += y(i)*x(i);
+                tempv += x(i);
+            }
+
+            // Account for the extra 1 that we pretend is appended to x
+            // Make C = [C      tempv
+            //           tempv' x.size()]
+            C = join_cols(join_rows(C, tempv), 
+                          join_rows(trans(tempv), uniform_matrix<scalar_type>(1,1, x.size())));
+            L = join_cols(L, uniform_matrix<scalar_type>(1,1, sum(y)));
+
+            eigenvalue_decomposition<general_matrix_type> eig(make_symmetric(C));
+            const general_matrix_type V = eig.get_pseudo_v();
+            const column_matrix_type  D = eig.get_real_eigenvalues();
+
+            // We can save some work by pre-multiplying the x vectors by trans(V)
+            // and saving the result so we don't have to recompute it over and over later.
+            matrix<column_matrix_type,0,1,mem_manager_type > Vx;
+            if (lambda == 0 || output_loo_values)
+            {
+                // Save the transpose of V into a temporary because the subsequent matrix
+                // vector multiplies will be faster (because of better cache locality).
+                const general_matrix_type transV( colm(trans(V),range(0,dims-1))  );
+                // Remember the pretend 1 at the end of x(*).  We want to multiply trans(V)*x(*)
+                // so to do this we pull the last column off trans(V) and store it separately.
+                const column_matrix_type lastV = colm(trans(V), dims);
+                Vx.set_size(x.size());
+                for (long i = 0; i < x.size(); ++i)
+                {
+                    Vx(i) = transV*x(i);
+                    Vx(i) = squared(Vx(i) + lastV);
+                }
+            }
+
+            the_lambda = lambda;
+
+            // If we need to automatically select a lambda then do so using the LOOE trick described
+            // above.
+            bool did_loov = false;
+            scalar_type best_looe = std::numeric_limits<scalar_type>::max();
+            if (lambda == 0)
+            {
+                did_loov = true;
+
+                // Compute leave one out errors for a bunch of different lambdas and pick the best one.
+                for (long idx = 0; idx < lams.size(); ++idx)
+                {
+                    // first compute G
+                    tempv = 1.0/(D + lams(idx));
+                    tempm = scale_columns(V,tempv);
+                    G = tempm*trans(V);
+
+                    // compute the solution w for the current lambda
+                    w = G*L;
+
+                    // make w have the same length as the x vectors.
+                    const scalar_type b = w(dims);
+                    w = colm(w,0,dims);
+
+                    scalar_type looe = 0;
+                    for (long i = 0; i < x.size(); ++i)
+                    {
+                        // perform equivalent of: val = trans(x(i))*G*x(i);
+                        const scalar_type val = dot(tempv, Vx(i));
+                        const scalar_type temp = (1 - val);
+                        scalar_type loov;
+                        if (temp != 0)
+                            loov = (trans(w)*x(i) + b - y(i)*val) / temp;
+                        else
+                            loov = 0;
+
+                        looe += loss(loov, y(i));
+                    }
+
+                    // Keep track of the lambda which gave the lowest looe.  If two lambdas
+                    // have the same looe then pick the biggest lambda.
+                    if (looe < best_looe || (looe == best_looe && lams(idx) > the_lambda))
+                    {
+                        best_looe = looe;
+                        the_lambda = lams(idx);
+                    }
+                }
+
+                best_looe /= x.size();
+            }
+
+
+
+            // Now perform the main training.  That is, find w.
+            // first, compute G = inv(C + the_lambda*I)
+            tempv = 1.0/(D + the_lambda);
+            tempm = scale_columns(V,tempv);
+            G = tempm*trans(V);
+            w = G*L;
+           
+            // make w have the same length as the x vectors.
+            const scalar_type b = w(dims);
+            w = colm(w,0,dims);
+
+
+            // If we haven't done this already and we are supposed to then compute the LOO error rate for 
+            // the current lambda and store the result in best_looe.
+            if (output_loo_values)
+            {
+                loo_values.resize(x.size());
+                did_loov = true;
+                best_looe = 0;
+                for (long i = 0; i < x.size(); ++i)
+                {
+                    // perform equivalent of: val = trans(x(i))*G*x(i);
+                    const scalar_type val = dot(tempv, Vx(i));
+                    const scalar_type temp = (1 - val);
+                    scalar_type loov;
+                    if (temp != 0)
+                        loov = (trans(w)*x(i) + b - y(i)*val) / temp;
+                    else
+                        loov = 0;
+
+                    best_looe += loss(loov, y(i));
+                    loo_values[i] = loov;
+                }
+
+                best_looe /= x.size();
+
+            }
+            else
+            {
+                loo_values.clear();
+            }
+
+            if (verbose && did_loov)
+            {
+                using namespace std;
+                cout << "Using lambda:             " << the_lambda << endl;
+                if (use_regression_loss)
+                    cout << "LOO Mean Squared Error:   " << best_looe << endl;
+                else
+                    cout << "LOO Classification Error: " << best_looe << endl;
+            }
+
+            // convert w into a proper decision function
+            decision_function<kernel_type> df;
+            df.alpha.set_size(1);
+            df.alpha = 1;
+            df.basis_vectors.set_size(1);
+            df.basis_vectors(0) = w;
+            df.b = -b; // don't forget about the bias we stuck onto all the vectors
+
+            return df;
+        }
+
+        inline scalar_type loss (
+            const scalar_type& a,
+            const scalar_type& b
+        ) const
+        {
+            if (use_regression_loss)
+            {
+                return (a-b)*(a-b);
+            }
+            else
+            {
+                // if a and b have the same sign then no loss
+                if (a*b >= 0)
+                    return 0;
+                else
+                    return 1;
+            }
+        }
+
+
+        /*!
+            CONVENTION
+                - get_lambda() == lambda
+                - get_kernel() == kernel_type() 
+                - will_use_regression_loss_for_loo_cv() == use_regression_loss
+                - get_search_lambdas() == lams
+        !*/
+
+        bool verbose;
+        bool use_regression_loss;
+
+        scalar_type lambda;
+
+        matrix<scalar_type,0,0,mem_manager_type> lams; 
+    }; 
+
+}
+
+#endif // DLIB_RR_TRAInER_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/rr_trainer_abstract.h b/ml/dlib/dlib/svm/rr_trainer_abstract.h
new file mode 100644
index 000000000..f2fe21068
--- /dev/null
+++ b/ml/dlib/dlib/svm/rr_trainer_abstract.h
@@ -0,0 +1,255 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_RR_TRAInER_ABSTRACT_Hh_
+#ifdef DLIB_RR_TRAInER_ABSTRACT_Hh_
+
+#include "../algs.h"
+#include "function_abstract.h"
+
+namespace dlib
+{
+    template <
+        typename K 
+        >
+    class rr_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                is the dlib::linear_kernel instantiated with some kind of column vector.
+
+            INITIAL VALUE
+                - get_lambda() == 0
+                - will_use_regression_loss_for_loo_cv() == true
+                - get_search_lambdas() == logspace(-9, 2, 50) 
+                - this object will not be verbose unless be_verbose() is called
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a tool for performing linear ridge regression 
+                (This basic algorithm is also known my many other names, e.g. regularized 
+                least squares or least squares SVM). 
+
+                The exact definition of what this algorithm does is this:
+                    Find w and b that minimizes the following (x_i are input samples and y_i are target values):
+                        lambda*dot(w,w) + sum_over_i( (f(x_i) - y_i)^2 )
+                        where f(x) == dot(x,w) - b
+
+                    So this algorithm is just regular old least squares regression but 
+                    with the addition of a regularization term which encourages small w.
+
+
+                It is capable of estimating the lambda parameter using leave-one-out cross-validation.
+
+
+                The leave-one-out cross-validation implementation is based on the techniques
+                discussed in this paper:
+                    Notes on Regularized Least Squares by Ryan M. Rifkin and Ross A. Lippert.
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        rr_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used.
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        const kernel_type get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object.  Since
+                  the linear kernels don't have any parameters this function just
+                  returns kernel_type()
+        !*/
+
+        void set_lambda (
+            scalar_type lambda 
+        );
+        /*!
+            requires
+                - lambda >= 0
+            ensures
+                - #get_lambda() == lambda 
+        !*/
+
+        const scalar_type get_lambda (
+        ) const;
+        /*!
+            ensures
+                - returns the regularization parameter.  It is the parameter that 
+                  determines the trade off between trying to fit the training data 
+                  exactly or allowing more errors but hopefully improving the 
+                  generalization ability of the resulting function.  Smaller values 
+                  encourage exact fitting while larger values of lambda may encourage 
+                  better generalization. 
+
+                  Note that a lambda of 0 has a special meaning.  It indicates to this
+                  object that it should automatically determine an appropriate lambda
+                  value.  This is done using leave-one-out cross-validation.
+        !*/
+
+        void use_regression_loss_for_loo_cv (
+        );
+        /*!
+            ensures
+                - #will_use_regression_loss_for_loo_cv() == true
+        !*/
+
+        void use_classification_loss_for_loo_cv (
+        );
+        /*!
+            ensures
+                - #will_use_regression_loss_for_loo_cv() == false 
+        !*/
+
+        bool will_use_regression_loss_for_loo_cv (
+        ) const;
+        /*!
+            ensures
+                - returns true if the automatic lambda estimation will attempt to estimate a lambda
+                  appropriate for a regression task.  Otherwise it will try and find one which
+                  minimizes the number of classification errors.
+        !*/
+
+        template <typename EXP>
+        void set_search_lambdas (
+            const matrix_exp<EXP>& lambdas
+        );
+        /*!
+            requires
+                - is_vector(lambdas) == true
+                - lambdas.size() > 0
+                - min(lambdas) > 0
+                - lambdas must contain floating point numbers
+            ensures
+                - #get_search_lambdas() == lambdas
+        !*/
+
+        const matrix<scalar_type,0,0,mem_manager_type>& get_search_lambdas (
+        ) const;
+        /*!
+            ensures
+                - returns a matrix M such that:
+                    - is_vector(M) == true
+                    - M == a list of all the lambda values which will be tried when performing
+                      LOO cross-validation for determining the best lambda. 
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+                - is_learning_problem(x,y) == true
+                - if (get_lambda() == 0 && will_use_regression_loss_for_loo_cv() == false) then
+                    - is_binary_classification_problem(x,y) == true
+                      (i.e. if you want this algorithm to estimate a lambda appropriate for
+                      classification functions then you had better give a valid classification
+                      problem)
+            ensures
+                - performs linear ridge regression given the training samples in x and target values in y.  
+                - returns a decision_function F with the following properties:
+                    - F(new_x) == predicted y value
+                    - F.alpha.size() == 1
+                    - F.basis_vectors.size() == 1
+                    - F.alpha(0) == 1
+
+                - if (get_lambda() == 0) then
+                    - This object will perform internal leave-one-out cross-validation to determine an 
+                      appropriate lambda automatically.  It will compute the LOO error for each lambda
+                      in get_search_lambdas() and select the best one.
+                    - if (will_use_regression_loss_for_loo_cv()) then
+                        - the lambda selected will be the one that minimizes the mean squared error.
+                    - else
+                        - the lambda selected will be the one that minimizes the number classification 
+                          mistakes.  We say a point is classified correctly if the output of the
+                          decision_function has the same sign as its label.
+                    - #get_lambda() == 0
+                      (i.e. we don't change the get_lambda() value.  If you want to know what the
+                      automatically selected lambda value was then call the version of train()
+                      defined below)
+                - else
+                    - The user supplied value of get_lambda() will be used to perform the ridge regression.
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            std::vector<scalar_type>& loo_values
+        ) const;
+        /*!
+            requires
+                - all the requirements for train(x,y) must be satisfied
+            ensures
+                - returns train(x,y)
+                  (i.e. executes train(x,y) and returns its result)
+                - #loo_values.size() == y.size()
+                - for all valid i:
+                    - #loo_values[i] == leave-one-out prediction for the value of y(i) based 
+                      on all the training samples other than (x(i),y(i)).
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            std::vector<scalar_type>& loo_values,
+            scalar_type& lambda_used 
+        ) const;
+        /*!
+            requires
+                - all the requirements for train(x,y) must be satisfied
+            ensures
+                - returns train(x,y)
+                  (i.e. executes train(x,y) and returns its result)
+                - #loo_values.size() == y.size()
+                - for all valid i:
+                    - #loo_values[i] == leave-one-out prediction for the value of y(i) based 
+                      on all the training samples other than (x(i),y(i)).
+                - #lambda_used == the value of lambda used to generate the 
+                  decision_function.  Note that this lambda value is always 
+                  equal to get_lambda() if get_lambda() isn't 0.
+        !*/
+
+    }; 
+
+}
+
+#endif // DLIB_RR_TRAInER_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/rvm.h b/ml/dlib/dlib/svm/rvm.h
new file mode 100644
index 000000000..e7ad495a2
--- /dev/null
+++ b/ml/dlib/dlib/svm/rvm.h
@@ -0,0 +1,1018 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_RVm_
+#define DLIB_RVm_
+
+#include "rvm_abstract.h"
+#include <cmath>
+#include <limits>
+#include "../matrix.h"
+#include "../algs.h"
+#include "function.h"
+#include "kernel.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace rvm_helpers
+    {
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename scalar_vector_type, typename mem_manager_type>
+        long find_next_best_alpha_to_update (
+            const scalar_vector_type& S,
+            const scalar_vector_type& Q,
+            const scalar_vector_type& alpha,
+            const matrix<long,0,1,mem_manager_type>& active_bases,
+            const bool search_all_alphas,
+            typename scalar_vector_type::type eps
+        ) 
+        /*!
+            ensures
+                - if (we can find another alpha to update) then
+                    - returns the index of said alpha 
+                - else
+                    - returns -1
+        !*/
+        {
+            typedef typename scalar_vector_type::type scalar_type;
+            // now use S and Q to find next alpha to update.  What
+            // we want to do here is select the alpha to update that gives us
+            // the greatest improvement in marginal likelihood.
+            long selected_idx = -1;
+            scalar_type greatest_improvement = -1;
+            for (long i = 0; i < S.nr(); ++i)
+            {
+                scalar_type value = -1;
+
+                // if i is currently in the active set
+                if (active_bases(i) >= 0)
+                {
+                    const long idx = active_bases(i);
+                    const scalar_type s = alpha(idx)*S(i)/(alpha(idx) - S(i));
+                    const scalar_type q = alpha(idx)*Q(i)/(alpha(idx) - S(i));
+
+                    if (q*q-s > 0)
+                    {
+                        // only update an existing alpha if this is a narrow search
+                        if (search_all_alphas == false)
+                        {
+                            // choosing this sample would mean doing an update of an 
+                            // existing alpha value.
+                            scalar_type new_alpha = s*s/(q*q-s);
+                            scalar_type cur_alpha = alpha(idx);
+                            new_alpha = 1/new_alpha;
+                            cur_alpha = 1/cur_alpha;
+
+                            // from equation 32 in the Tipping paper 
+                            value = Q(i)*Q(i)/(S(i) +  1/(new_alpha - cur_alpha) ) - 
+                                std::log(1 + S(i)*(new_alpha - cur_alpha));
+                        }
+
+                    }
+                    // we only pick an alpha to remove if this is a wide search and it wasn't one of the recently added ones 
+                    else if (search_all_alphas && idx+2 < alpha.size() )  
+                    {
+                        // choosing this sample would mean the alpha value is infinite 
+                        // so we would remove the selected sample from our model.
+
+                        // from equation 37 in the Tipping paper 
+                        value = Q(i)*Q(i)/(S(i) - alpha(idx)) - 
+                            std::log(1-S(i)/alpha(idx));
+
+                    }
+                }
+                else if (search_all_alphas)
+                {
+                    const scalar_type s = S(i);
+                    const scalar_type q = Q(i);
+
+                    if (q*q-s > 0)
+                    {
+                        // choosing this sample would mean we would add the selected 
+                        // sample to our model.
+
+                        // from equation 27 in the Tipping paper 
+                        value = (Q(i)*Q(i)-S(i))/S(i) + std::log(S(i)/(Q(i)*Q(i)));
+                    }
+                }
+
+                if (value > greatest_improvement)
+                {
+                    greatest_improvement = value;
+                    selected_idx = i;
+                }
+            }
+
+            // If the greatest_improvement in marginal likelihood we would get is less
+            // than our epsilon then report that there isn't anything else to do.  But
+            // if it is big enough then return the selected_idx.
+            if (greatest_improvement > eps)
+                return selected_idx;
+            else
+                return -1;
+        }
+
+    } // end namespace rvm_helpers
+
+    // ------------------------------------------------------------------------------------
+
+
+    template <
+        typename kern_type 
+        >
+    class rvm_trainer 
+    {
+        /*!
+            This is an implementation of the binary classifier version of the
+            relevance vector machine algorithm described in the paper:
+                Tipping, M. E. and A. C. Faul (2003). Fast marginal likelihood maximisation 
+                for sparse Bayesian models. In C. M. Bishop and B. J. Frey (Eds.), Proceedings 
+                of the Ninth International Workshop on Artificial Intelligence and Statistics, 
+                Key West, FL, Jan 3-6.
+
+            This code mostly does what is described in the above paper with the exception 
+            that here we use a different stopping condition as well as a modified alpha
+            selection rule.  See the code for the exact details.
+        !*/
+
+    public:
+        typedef kern_type kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        rvm_trainer (
+        ) : eps(0.001), max_iterations(2000)
+        {
+        }
+
+        void set_max_iterations (
+            unsigned long max_iterations_
+        )
+        {
+            max_iterations = max_iterations_;
+        }
+
+        unsigned long get_max_iterations (
+        ) const
+        { 
+            return max_iterations;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid rvm_trainer::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const
+        { 
+            return eps;
+        }
+
+        void set_kernel (
+            const kernel_type& k
+        )
+        {
+            kernel = k;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            return do_train(mat(x), mat(y));
+        }
+
+        void swap (
+            rvm_trainer& item
+        )
+        {
+            exchange(kernel, item.kernel);
+            exchange(eps, item.eps);
+        }
+
+    private:
+
+    // ------------------------------------------------------------------------------------
+
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type;
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
+                "\tdecision_function rvm_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t y.nr(): " << y.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                << "\n\t y.nc(): " << y.nc() 
+                << "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
+                );
+
+            // make a target vector where +1 examples have value 1 and -1 examples
+            // have a value of 0.
+            scalar_vector_type t(y.size());
+            for (long i = 0; i < y.size(); ++i)
+            {
+                if (y(i) == 1)
+                    t(i) = 1;
+                else
+                    t(i) = 0;
+            }
+
+            /*! This is the convention for the active_bases variable in the function:
+                - if (active_bases(i) >= 0) then
+                    - alpha(active_bases(i)) == the alpha value associated with sample x(i)
+                    - weights(active_bases(i)) == the weight value associated with sample x(i)
+                    - colm(phi, active_bases(i)) == the column of phi associated with sample x(i)
+                    - colm(phi, active_bases(i)) == kernel column i (from get_kernel_colum()) 
+                - else
+                    - the i'th sample isn't in the model and notionally has an alpha of infinity and
+                      a weight of 0.
+            !*/
+            matrix<long,0,1,mem_manager_type> active_bases(x.nr());
+            scalar_matrix_type phi(x.nr(),1);
+            scalar_vector_type alpha(1), prev_alpha;
+            scalar_vector_type weights(1), prev_weights;
+
+            scalar_vector_type tempv, K_col; 
+
+            // set the initial values of these guys
+            set_all_elements(active_bases, -1);
+            long first_basis = pick_initial_vector(x,t);
+            get_kernel_colum(first_basis, x, K_col);
+            active_bases(first_basis) = 0;
+            set_colm(phi,0) = K_col;
+            alpha(0) = compute_initial_alpha(phi, t);
+            weights(0) = 1;
+
+
+            // now declare a bunch of other variables we will be using below
+            scalar_vector_type mu, t_hat, Q, S; 
+            scalar_matrix_type sigma;
+            
+            matrix<scalar_type,1,0,mem_manager_type> tempv2, tempv3;
+            scalar_matrix_type tempm;
+
+            scalar_vector_type t_estimate;
+            scalar_vector_type beta;
+
+
+            Q.set_size(x.nr());
+            S.set_size(x.nr());
+
+            bool recompute_beta = true;
+
+            bool search_all_alphas = false;
+            unsigned long ticker = 0;
+            const unsigned long rounds_of_narrow_search = 100;
+            unsigned long iterations = 0;
+
+            while (iterations != max_iterations)
+            {
+                iterations++;
+                if (recompute_beta)
+                {
+                    // calculate the current t_estimate. (this is the predicted t value for each sample according to the
+                    // current state of the classifier)
+                    t_estimate = phi*weights;
+
+                    // calculate the current beta
+                    beta = sigmoid(t_estimate);
+                    beta = pointwise_multiply(beta,(uniform_matrix<scalar_type>(beta.nr(),beta.nc(),1)-beta));
+                    recompute_beta = false;
+                }
+
+                // Compute optimal weights and sigma for current alpha using IRLS.  This is the same
+                // technique documented in the paper by equations 12-14. 
+                scalar_type weight_delta = std::numeric_limits<scalar_type>::max();
+                int count = 0;
+                while (weight_delta > 0.0001)
+                {
+                    // This is a sanity check to make sure we never get stuck in this
+                    // loop to do some degenerate numerical condition 
+                    ++count;
+                    if (count > 100)
+                    {
+                        // jump us to where search_all_alphas will be set to true 
+                        ticker = rounds_of_narrow_search;
+                        break;
+                    }
+
+                    // compute the updated sigma matrix
+                    sigma = scale_columns(trans(phi),beta)*phi;
+                    for (long r = 0; r < alpha.nr(); ++r)
+                        sigma(r,r) += alpha(r);
+                    sigma = inv(sigma);
+
+
+                    // compute the updated weights vector (t_hat = phi*mu_mp + inv(B)*(t-y))
+                    t_hat = t_estimate + trans(scale_columns(trans(t-sigmoid(t_estimate)),reciprocal(beta))); 
+
+                    // mu = sigma*trans(phi)*b*t_hat
+                    mu = sigma*tmp(trans(phi)* trans(scale_columns(trans(t_hat), beta)));  
+
+                    // now compute how much the weights vector changed during this iteration
+                    // through this loop.
+                    weight_delta = max(abs(mu-weights));
+
+                    // put mu into the weights vector
+                    mu.swap(weights);
+
+                    // calculate the current t_estimate
+                    t_estimate = phi*weights;
+
+                    // calculate the current beta
+                    beta = sigmoid(t_estimate);
+                    beta = pointwise_multiply(beta, uniform_matrix<scalar_type>(beta.nr(),beta.nc(),1)-beta);
+
+                }
+
+                // check if we should do a full search for the best alpha to optimize
+                if (ticker >= rounds_of_narrow_search)
+                {
+                    // if the current alpha and weights are equal to what they were
+                    // at the last time we were about to start a wide search then
+                    // we are done.
+                    if (equal(prev_alpha, alpha, eps) && equal(prev_weights, weights, eps))
+                        break;
+
+
+                    prev_alpha = alpha;
+                    prev_weights = weights;
+                    search_all_alphas = true;
+                    ticker = 0;
+                }
+                else
+                {
+                    search_all_alphas = false;
+                }
+                ++ticker;
+
+                // compute S and Q using equations 24 and 25 (tempv = phi*sigma*trans(phi)*B*t_hat)
+                tempv = phi*tmp(sigma*tmp(trans(phi)*trans(scale_columns(trans(t_hat),beta)))); 
+                for (long i = 0; i < S.size(); ++i)
+                {
+                    // if we are currently limiting the search for the next alpha to update
+                    // to the set in the active set then skip a non-active vector.
+                    if (search_all_alphas == false && active_bases(i) == -1)
+                        continue;
+
+                    // get the column for this sample out of the kernel matrix.  If it is 
+                    // something in the active set then just get it right out of phi, otherwise 
+                    // we have to calculate it.
+                    if (active_bases(i) != -1)
+                        K_col = colm(phi,active_bases(i));
+                    else
+                        get_kernel_colum(i, x, K_col);
+
+                    // tempv2 = trans(phi_m)*B
+                    tempv2 = scale_columns(trans(K_col), beta);  
+                    tempv3 = tempv2*phi;
+                    S(i) = tempv2*K_col - tempv3*sigma*trans(tempv3);
+                    Q(i) = tempv2*t_hat - tempv2*tempv; 
+                }
+
+                const long selected_idx = rvm_helpers::find_next_best_alpha_to_update(S,Q,alpha,active_bases, search_all_alphas, eps);
+
+
+                // if find_next_best_alpha_to_update didn't find any good alpha to update
+                if (selected_idx == -1)
+                {
+                    if (search_all_alphas == false)
+                    {
+                        // jump us to where search_all_alphas will be set to true and try again
+                        ticker = rounds_of_narrow_search;
+                        continue;
+                    }
+                    else
+                    {
+                        // we are really done so quit the main loop
+                        break;
+                    }
+                }
+
+
+                // next we update the selected alpha.
+
+                // if the selected alpha is in the active set
+                if (active_bases(selected_idx) >= 0)
+                {
+                    const long idx = active_bases(selected_idx);
+                    const scalar_type s = alpha(idx)*S(selected_idx)/(alpha(idx) - S(selected_idx));
+                    const scalar_type q = alpha(idx)*Q(selected_idx)/(alpha(idx) - S(selected_idx));
+
+                    if (q*q-s > 0)
+                    {
+                        // reestimate the value of alpha
+                        alpha(idx) = s*s/(q*q-s);
+
+                    }
+                    else 
+                    {
+                        // the new alpha value is infinite so remove the selected alpha from our model
+                        active_bases(selected_idx) = -1; 
+                        phi = remove_col(phi, idx);
+                        weights = remove_row(weights, idx);
+                        alpha = remove_row(alpha, idx);
+
+                        // fix the index values in active_bases
+                        for (long i = 0; i < active_bases.size(); ++i)
+                        {
+                            if (active_bases(i) > idx)
+                            {
+                                active_bases(i) -= 1;
+                            }
+                        }
+
+                        // we changed the number of weights so we need to remember to 
+                        // recompute the beta vector next time around the main loop.
+                        recompute_beta = true;
+                    }
+                }
+                else
+                {
+                    const scalar_type s = S(selected_idx);
+                    const scalar_type q = Q(selected_idx);
+
+                    if (q*q-s > 0)
+                    {
+                        // add the selected alpha to our model
+                        
+                        active_bases(selected_idx) = phi.nc();
+                        
+                        // update alpha
+                        tempv.set_size(alpha.size()+1);
+                        set_subm(tempv, get_rect(alpha)) = alpha;
+                        tempv(phi.nc()) = s*s/(q*q-s);
+                        tempv.swap(alpha);
+
+                        // update weights 
+                        tempv.set_size(weights.size()+1);
+                        set_subm(tempv, get_rect(weights)) = weights;
+                        tempv(phi.nc()) = 0;
+                        tempv.swap(weights);
+
+                        // update phi by adding the new sample's kernel matrix column in as one of phi's columns
+                        tempm.set_size(phi.nr(), phi.nc()+1);
+                        set_subm(tempm, get_rect(phi)) = phi;
+                        get_kernel_colum(selected_idx, x, K_col);
+                        set_colm(tempm, phi.nc()) = K_col;
+                        tempm.swap(phi);
+
+
+                        // we changed the number of weights so we need to remember to 
+                        // recompute the beta vector next time around the main loop.
+                        recompute_beta = true;
+                    }
+                }
+
+            } // end while(true).  So we have converged on the final answer.
+
+
+            // now put everything into a decision_function object and return it
+            std_vector_c<sample_type> dictionary;
+            std_vector_c<scalar_type> final_weights;
+            for (long i = 0; i < active_bases.size(); ++i)
+            {
+                if (active_bases(i) >= 0)
+                {
+                    dictionary.push_back(x(i));
+                    final_weights.push_back(weights(active_bases(i)));
+                }
+            }
+
+            return decision_function<kernel_type> ( mat(final_weights),
+                                                    -sum(mat(final_weights))*tau, 
+                                                    kernel,
+                                                    mat(dictionary));
+
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename M1, typename M2>
+        long pick_initial_vector (
+            const M1& x,
+            const M2& t
+        ) const
+        {
+            scalar_vector_type K_col;
+            double max_projection = -std::numeric_limits<scalar_type>::infinity();
+            long max_idx = 0;
+            // find the row in the kernel matrix that has the biggest normalized projection onto the t vector
+            for (long r = 0; r < x.nr(); ++r)
+            {
+                get_kernel_colum(r,x,K_col);
+                double temp = trans(K_col)*t;
+                temp = temp*temp/length_squared(K_col);
+
+                if (temp > max_projection)
+                {
+                    max_projection = temp;
+                    max_idx = r;
+                }
+            }
+
+            return max_idx;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename T>
+        void get_kernel_colum (
+            long idx,
+            const T& x,
+            scalar_vector_type& col
+        ) const
+        {
+            col.set_size(x.nr());
+            for (long i = 0; i < col.size(); ++i)
+            {
+                col(i) = kernel(x(idx), x(i)) + tau;
+            }
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename M1, typename M2>
+        scalar_type compute_initial_alpha (
+            const M1& phi,
+            const M2& t
+        ) const
+        {
+            const double temp = length_squared(phi);
+            const double temp2 = trans(phi)*t;
+
+            return temp/( temp2*temp2/temp + variance(t)*0.1);
+        }
+
+    // ------------------------------------------------------------------------------------
+
+    // private member variables
+        kernel_type kernel;
+        scalar_type eps;
+        unsigned long max_iterations;
+
+        const static scalar_type tau;
+
+    }; // end of class rvm_trainer 
+
+    template <typename kernel_type>
+    const typename kernel_type::scalar_type rvm_trainer<kernel_type>::tau = static_cast<typename kernel_type::scalar_type>(0.001);
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename K>
+    void swap (
+        rvm_trainer<K>& a,
+        rvm_trainer<K>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kern_type 
+        >
+    class rvm_regression_trainer 
+    {
+        /*!
+            This is an implementation of the regression version of the
+            relevance vector machine algorithm described in the paper:
+                Tipping, M. E. and A. C. Faul (2003). Fast marginal likelihood maximisation 
+                for sparse Bayesian models. In C. M. Bishop and B. J. Frey (Eds.), Proceedings 
+                of the Ninth International Workshop on Artificial Intelligence and Statistics, 
+                Key West, FL, Jan 3-6.
+
+            This code mostly does what is described in the above paper with the exception 
+            that here we use a different stopping condition as well as a modified alpha
+            selection rule.  See the code for the exact details.
+        !*/
+
+    public:
+        typedef kern_type kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        rvm_regression_trainer (
+        ) : eps(0.001)
+        {
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid rvm_regression_trainer::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const
+        { 
+            return eps;
+        }
+
+        void set_kernel (
+            const kernel_type& k
+        )
+        {
+            kernel = k;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& t
+        ) const
+        {
+            return do_train(mat(x), mat(t));
+        }
+
+        void swap (
+            rvm_regression_trainer& item
+        )
+        {
+            exchange(kernel, item.kernel);
+            exchange(eps, item.eps);
+        }
+
+    private:
+
+    // ------------------------------------------------------------------------------------
+
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+        typedef matrix<scalar_type,0,0,mem_manager_type> scalar_matrix_type;
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& t
+        ) const
+        {
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(x,t) && x.size() > 0,
+                "\tdecision_function rvm_regression_trainer::train(x,t)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t t.nr(): " << t.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                << "\n\t t.nc(): " << t.nc() 
+                );
+
+
+            /*! This is the convention for the active_bases variable in the function:
+                - if (active_bases(i) >= 0) then
+                    - alpha(active_bases(i)) == the alpha value associated with sample x(i)
+                    - weights(active_bases(i)) == the weight value associated with sample x(i)
+                    - colm(phi, active_bases(i)) == the column of phi associated with sample x(i)
+                    - colm(phi, active_bases(i)) == kernel column i (from get_kernel_colum()) 
+                - else
+                    - the i'th sample isn't in the model and notionally has an alpha of infinity and
+                      a weight of 0.
+            !*/
+            matrix<long,0,1,mem_manager_type> active_bases(x.nr());
+            scalar_matrix_type phi(x.nr(),1);
+            scalar_vector_type alpha(1), prev_alpha;
+            scalar_vector_type weights(1), prev_weights;
+
+            scalar_vector_type tempv, K_col; 
+            scalar_type var = variance(t)*0.1;
+
+            // set the initial values of these guys
+            set_all_elements(active_bases, -1);
+            long first_basis = pick_initial_vector(x,t);
+            get_kernel_colum(first_basis, x, K_col);
+            active_bases(first_basis) = 0;
+            set_colm(phi,0) = K_col;
+            alpha(0) = compute_initial_alpha(phi, t, var);
+            weights(0) = 1;
+
+
+            // now declare a bunch of other variables we will be using below
+            scalar_vector_type Q, S; 
+            scalar_matrix_type sigma;
+            
+            matrix<scalar_type,1,0,mem_manager_type> tempv2, tempv3;
+            scalar_matrix_type tempm;
+
+
+            Q.set_size(x.nr());
+            S.set_size(x.nr());
+
+
+            bool search_all_alphas = false;
+            unsigned long ticker = 0;
+            const unsigned long rounds_of_narrow_search = 100;
+
+            while (true)
+            {
+                // Compute optimal weights and sigma for current alpha using equation 6. 
+                sigma = trans(phi)*phi/var;
+                for (long r = 0; r < alpha.nr(); ++r)
+                    sigma(r,r) += alpha(r);
+                sigma = inv(sigma);
+                weights = sigma*trans(phi)*t/var;  
+
+
+
+                // check if we should do a full search for the best alpha to optimize
+                if (ticker == rounds_of_narrow_search)
+                {
+                    // if the current alpha and weights are equal to what they were
+                    // at the last time we were about to start a wide search then
+                    // we are done.
+                    if (equal(prev_alpha, alpha, eps) && equal(prev_weights, weights, eps))
+                        break;
+
+                    prev_alpha = alpha;
+                    prev_weights = weights;
+                    search_all_alphas = true;
+                    ticker = 0;
+                }
+                else
+                {
+                    search_all_alphas = false;
+                }
+                ++ticker;
+
+                // compute S and Q using equations 24 and 25 (tempv = phi*sigma*trans(phi)*B*t)
+                tempv = phi*tmp(sigma*tmp(trans(phi)*t/var)); 
+                for (long i = 0; i < S.size(); ++i)
+                {
+                    // if we are currently limiting the search for the next alpha to update
+                    // to the set in the active set then skip a non-active vector.
+                    if (search_all_alphas == false && active_bases(i) == -1)
+                        continue;
+
+                    // get the column for this sample out of the kernel matrix.  If it is 
+                    // something in the active set then just get it right out of phi, otherwise 
+                    // we have to calculate it.
+                    if (active_bases(i) != -1)
+                        K_col = colm(phi,active_bases(i));
+                    else
+                        get_kernel_colum(i, x, K_col);
+
+                    // tempv2 = trans(phi_m)*B
+                    tempv2 = trans(K_col)/var;  
+                    tempv3 = tempv2*phi;
+                    S(i) = tempv2*K_col - tempv3*sigma*trans(tempv3);
+                    Q(i) = tempv2*t - tempv2*tempv; 
+                }
+
+                const long selected_idx = rvm_helpers::find_next_best_alpha_to_update(S,Q,alpha,active_bases, search_all_alphas, eps);
+
+                // if find_next_best_alpha_to_update didn't find any good alpha to update
+                if (selected_idx == -1)
+                {
+                    if (search_all_alphas == false)
+                    {
+                        // jump us to where search_all_alphas will be set to true and try again
+                        ticker = rounds_of_narrow_search;
+                        continue;
+                    }
+                    else
+                    {
+                        // we are really done so quit the main loop
+                        break;
+                    }
+                }
+
+                // recompute the variance
+                var = length_squared(t - phi*weights)/(x.nr() - weights.size() + trans(alpha)*diag(sigma));
+
+                // next we update the selected alpha.
+
+                // if the selected alpha is in the active set
+                if (active_bases(selected_idx) >= 0)
+                {
+                    const long idx = active_bases(selected_idx);
+                    const scalar_type s = alpha(idx)*S(selected_idx)/(alpha(idx) - S(selected_idx));
+                    const scalar_type q = alpha(idx)*Q(selected_idx)/(alpha(idx) - S(selected_idx));
+
+                    if (q*q-s > 0)
+                    {
+                        // reestimate the value of alpha
+                        alpha(idx) = s*s/(q*q-s);
+
+                    }
+                    else 
+                    {
+                        // the new alpha value is infinite so remove the selected alpha from our model
+                        active_bases(selected_idx) = -1; 
+                        phi = remove_col(phi, idx);
+                        weights = remove_row(weights, idx);
+                        alpha = remove_row(alpha, idx);
+
+                        // fix the index values in active_bases
+                        for (long i = 0; i < active_bases.size(); ++i)
+                        {
+                            if (active_bases(i) > idx)
+                            {
+                                active_bases(i) -= 1;
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    const scalar_type s = S(selected_idx);
+                    const scalar_type q = Q(selected_idx);
+
+                    if (q*q-s > 0)
+                    {
+                        // add the selected alpha to our model
+                        
+                        active_bases(selected_idx) = phi.nc();
+                        
+                        // update alpha
+                        tempv.set_size(alpha.size()+1);
+                        set_subm(tempv, get_rect(alpha)) = alpha;
+                        tempv(phi.nc()) = s*s/(q*q-s);
+                        tempv.swap(alpha);
+
+                        // update weights 
+                        tempv.set_size(weights.size()+1);
+                        set_subm(tempv, get_rect(weights)) = weights;
+                        tempv(phi.nc()) = 0;
+                        tempv.swap(weights);
+
+                        // update phi by adding the new sample's kernel matrix column in as one of phi's columns
+                        tempm.set_size(phi.nr(), phi.nc()+1);
+                        set_subm(tempm, get_rect(phi)) = phi;
+                        get_kernel_colum(selected_idx, x, K_col);
+                        set_colm(tempm, phi.nc()) = K_col;
+                        tempm.swap(phi);
+
+                    }
+                }
+
+
+
+            } // end while(true).  So we have converged on the final answer.
+
+       
+            // now put everything into a decision_function object and return it
+            std_vector_c<sample_type> dictionary;
+            std_vector_c<scalar_type> final_weights;
+            for (long i = 0; i < active_bases.size(); ++i)
+            {
+                if (active_bases(i) >= 0)
+                {
+                    dictionary.push_back(x(i));
+                    final_weights.push_back(weights(active_bases(i)));
+                }
+            }
+
+            return decision_function<kernel_type> ( mat(final_weights),
+                                                    -sum(mat(final_weights))*tau, 
+                                                    kernel,
+                                                    mat(dictionary));
+
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename T>
+        void get_kernel_colum (
+            long idx,
+            const T& x,
+            scalar_vector_type& col
+        ) const
+        {
+            col.set_size(x.nr());
+            for (long i = 0; i < col.size(); ++i)
+            {
+                col(i) = kernel(x(idx), x(i)) + tau;
+            }
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename M1, typename M2>
+        scalar_type compute_initial_alpha (
+            const M1& phi,
+            const M2& t,
+            const scalar_type& var
+        ) const
+        {
+            const double temp = length_squared(phi);
+            const double temp2 = trans(phi)*t;
+
+            return temp/( temp2*temp2/temp + var);
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename M1, typename M2>
+        long pick_initial_vector (
+            const M1& x,
+            const M2& t
+        ) const
+        {
+            scalar_vector_type K_col;
+            double max_projection = -std::numeric_limits<scalar_type>::infinity();
+            long max_idx = 0;
+            // find the row in the kernel matrix that has the biggest normalized projection onto the t vector
+            for (long r = 0; r < x.nr(); ++r)
+            {
+                get_kernel_colum(r,x,K_col);
+                double temp = trans(K_col)*t;
+                temp = temp*temp/length_squared(K_col);
+
+                if (temp > max_projection)
+                {
+                    max_projection = temp;
+                    max_idx = r;
+                }
+            }
+
+            return max_idx;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+    // private member variables
+        kernel_type kernel;
+        scalar_type eps;
+
+        const static scalar_type tau;
+
+    }; // end of class rvm_regression_trainer 
+
+    template <typename kernel_type>
+    const typename kernel_type::scalar_type rvm_regression_trainer<kernel_type>::tau = static_cast<typename kernel_type::scalar_type>(0.001);
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename K>
+    void swap (
+        rvm_regression_trainer<K>& a,
+        rvm_regression_trainer<K>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RVm_
+
+
diff --git a/ml/dlib/dlib/svm/rvm_abstract.h b/ml/dlib/dlib/svm/rvm_abstract.h
new file mode 100644
index 000000000..236d2ad3c
--- /dev/null
+++ b/ml/dlib/dlib/svm/rvm_abstract.h
@@ -0,0 +1,278 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_RVm_ABSTRACT_
+#ifdef DLIB_RVm_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include "../matrix.h"
+#include "../algs.h"
+#include "function.h"
+#include "kernel.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kern_type 
+        >
+    class rvm_trainer 
+    {
+        /*!
+            REQUIREMENTS ON kern_type
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements a trainer for a relevance vector machine for 
+                solving binary classification problems.
+
+                The implementation of the RVM training algorithm used by this object is based
+                on the following excellent paper:
+                    Tipping, M. E. and A. C. Faul (2003). Fast marginal likelihood maximisation 
+                    for sparse Bayesian models. In C. M. Bishop and B. J. Frey (Eds.), Proceedings 
+                    of the Ninth International Workshop on Artificial Intelligence and Statistics, 
+                    Key West, FL, Jan 3-6.
+        !*/
+
+    public:
+        typedef kern_type kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        rvm_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a relevance vector machine.
+                - #get_epsilon() == 0.001
+                - #get_max_iterations() == 2000
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Generally a good value for this is 0.001.  Smaller values may result
+                  in a more accurate solution but take longer to execute.
+        !*/
+
+        void set_kernel (
+            const kernel_type& k
+        );
+        /*!
+            ensures
+                - #get_kernel() == k 
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object
+        !*/
+
+        unsigned long get_max_iterations (
+        ) const; 
+        /*!
+            ensures
+                - returns the maximum number of iterations the RVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ); 
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - is_binary_classification_problem(x,y) == true
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+            ensures
+                - trains a relevance vector classifier given the training samples in x and 
+                  labels in y.  
+                - returns a decision function F with the following properties:
+                    - if (new_x is a sample predicted have +1 label) then
+                        - F(new_x) >= 0
+                    - else
+                        - F(new_x) < 0
+            throws
+                - std::bad_alloc
+        !*/
+
+        void swap (
+            rvm_trainer& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+
+    };  
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename K>
+    void swap (
+        rvm_trainer<K>& a,
+        rvm_trainer<K>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kern_type 
+        >
+    class rvm_regression_trainer
+    {
+        /*!
+            REQUIREMENTS ON kern_type
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements a trainer for a relevance vector machine for 
+                solving regression problems.
+
+                The implementation of the RVM training algorithm used by this object is based
+                on the following excellent paper:
+                    Tipping, M. E. and A. C. Faul (2003). Fast marginal likelihood maximisation 
+                    for sparse Bayesian models. In C. M. Bishop and B. J. Frey (Eds.), Proceedings 
+                    of the Ninth International Workshop on Artificial Intelligence and Statistics, 
+                    Key West, FL, Jan 3-6.
+        !*/
+
+    public:
+        typedef kern_type kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        rvm_regression_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a relevance vector machine.
+                - #get_epsilon() == 0.001
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Generally a good value for this is 0.001.  Smaller values may result
+                  in a more accurate solution but take longer to execute.
+        !*/
+
+        void set_kernel (
+            const kernel_type& k
+        );
+        /*!
+            ensures
+                - #get_kernel() == k 
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+                - is_learning_problem(x,y) == true
+                - x.size() > 0
+            ensures
+                - trains a RVM given the training samples in x and 
+                  labels in y and returns the resulting decision_function.  
+            throws
+                - std::bad_alloc
+        !*/
+
+        void swap (
+            rvm_regression_trainer& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+
+    };  
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename K>
+    void swap (
+        rvm_regression_trainer<K>& a,
+        rvm_regression_trainer<K>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_RVm_ABSTRACT_
+
diff --git a/ml/dlib/dlib/svm/sequence_labeler.h b/ml/dlib/dlib/svm/sequence_labeler.h
new file mode 100644
index 000000000..882cdb881
--- /dev/null
+++ b/ml/dlib/dlib/svm/sequence_labeler.h
@@ -0,0 +1,339 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SEQUENCE_LAbELER_H_h_
+#define DLIB_SEQUENCE_LAbELER_H_h_
+
+#include "sequence_labeler_abstract.h"
+#include "../matrix.h"
+#include <vector>
+#include "../optimization/find_max_factor_graph_viterbi.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace fe_helpers
+    {
+        template <typename EXP>
+        struct dot_functor
+        {
+            dot_functor(const matrix_exp<EXP>& lambda_) : lambda(lambda_), value(0) {}
+
+            inline void operator() (
+                unsigned long feat_index
+            )
+            {
+                value += lambda(feat_index);
+            }
+
+            inline void operator() (
+                unsigned long feat_index,
+                double feat_value
+            )
+            {
+                value += feat_value*lambda(feat_index);
+            }
+
+            const matrix_exp<EXP>& lambda;
+            double value;
+        };
+
+        template <typename feature_extractor, typename EXP, typename sequence_type, typename EXP2> 
+        double dot(
+            const matrix_exp<EXP>& lambda,
+            const feature_extractor& fe,
+            const sequence_type& sequence,
+            const matrix_exp<EXP2>& candidate_labeling,
+            unsigned long position
+        )
+        {
+            dot_functor<EXP> dot(lambda);
+            fe.get_features(dot, sequence, candidate_labeling, position);
+            return dot.value;
+        }
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        DLIB_MAKE_HAS_MEMBER_FUNCTION_TEST(
+            has_reject_labeling, 
+            bool, 
+            template reject_labeling<matrix<unsigned long> >,
+            (const typename T::sequence_type&, const matrix_exp<matrix<unsigned long> >&, unsigned long)const
+        );
+
+        template <typename feature_extractor, typename EXP, typename sequence_type>
+        typename enable_if<has_reject_labeling<feature_extractor>,bool>::type call_reject_labeling_if_exists (
+            const feature_extractor& fe,
+            const sequence_type& x,
+            const matrix_exp<EXP>& y,
+            unsigned long position
+        )
+        {
+            return fe.reject_labeling(x, y, position);
+        }
+
+        template <typename feature_extractor, typename EXP, typename sequence_type>
+        typename disable_if<has_reject_labeling<feature_extractor>,bool>::type call_reject_labeling_if_exists (
+            const feature_extractor& ,
+            const sequence_type& ,
+            const matrix_exp<EXP>& ,
+            unsigned long 
+        )
+        {
+            return false;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor 
+        >
+    typename enable_if<dlib::impl::has_reject_labeling<feature_extractor>,bool>::type contains_invalid_labeling (
+        const feature_extractor& fe,
+        const typename feature_extractor::sequence_type& x,
+        const std::vector<unsigned long>& y
+    )
+    {
+        if (x.size() != y.size())
+            return true;
+
+        matrix<unsigned long,0,1> node_states;
+
+        for (unsigned long i = 0; i < x.size(); ++i)
+        {
+            node_states.set_size(std::min(fe.order(),i) + 1);
+            for (unsigned long j = 0; j < (unsigned long)node_states.size(); ++j)
+                node_states(j) = y[i-j];
+
+            if (fe.reject_labeling(x, node_states, i))
+                return true;
+        }
+
+        return false;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor 
+        >
+    typename disable_if<dlib::impl::has_reject_labeling<feature_extractor>,bool>::type contains_invalid_labeling (
+        const feature_extractor& ,
+        const typename feature_extractor::sequence_type& x,
+        const std::vector<unsigned long>& y 
+    )
+    {
+        if (x.size() != y.size())
+            return true;
+
+        return false;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor 
+        >
+    bool contains_invalid_labeling (
+        const feature_extractor& fe,
+        const std::vector<typename feature_extractor::sequence_type>& x,
+        const std::vector<std::vector<unsigned long> >& y
+    )
+    {
+        if (x.size() != y.size())
+            return true;
+
+        for (unsigned long i = 0; i < x.size(); ++i)
+        {
+            if (contains_invalid_labeling(fe,x[i],y[i]))
+                return true;
+        }
+        return false;
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class sequence_labeler
+    {
+    public:
+        typedef typename feature_extractor::sequence_type sample_sequence_type;
+        typedef std::vector<unsigned long> labeled_sequence_type;
+
+    private:
+        class map_prob
+        {
+        public:
+            unsigned long order() const { return fe.order(); }
+            unsigned long num_states() const { return fe.num_labels(); }
+
+            map_prob(
+                const sample_sequence_type& x_,
+                const feature_extractor& fe_,
+                const matrix<double,0,1>& weights_
+            ) :
+                sequence(x_),
+                fe(fe_),
+                weights(weights_)
+            {
+            }
+
+            unsigned long number_of_nodes(
+            ) const
+            {
+                return sequence.size();
+            }
+
+            template <
+                typename EXP 
+                >
+            double factor_value (
+                unsigned long node_id,
+                const matrix_exp<EXP>& node_states
+            ) const
+            {
+                if (dlib::impl::call_reject_labeling_if_exists(fe, sequence,  node_states, node_id))
+                    return -std::numeric_limits<double>::infinity();
+
+                return fe_helpers::dot(weights, fe, sequence, node_states, node_id);
+            }
+
+            const sample_sequence_type& sequence;
+            const feature_extractor& fe;
+            const matrix<double,0,1>& weights;
+        };
+    public:
+
+        sequence_labeler()
+        {
+            weights.set_size(fe.num_features());
+            weights = 0;
+        }
+
+        explicit sequence_labeler(
+            const matrix<double,0,1>& weights_
+        ) : 
+            weights(weights_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(fe.num_features() == static_cast<unsigned long>(weights_.size()),
+                "\t sequence_labeler::sequence_labeler(weights_)"
+                << "\n\t These sizes should match"
+                << "\n\t fe.num_features(): " << fe.num_features() 
+                << "\n\t weights_.size():   " << weights_.size() 
+                << "\n\t this: " << this
+                );
+        }
+
+        sequence_labeler(
+            const matrix<double,0,1>& weights_,
+            const feature_extractor& fe_
+        ) :
+            fe(fe_),
+            weights(weights_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(fe_.num_features() == static_cast<unsigned long>(weights_.size()),
+                "\t sequence_labeler::sequence_labeler(weights_,fe_)"
+                << "\n\t These sizes should match"
+                << "\n\t fe_.num_features(): " << fe_.num_features() 
+                << "\n\t weights_.size():    " << weights_.size() 
+                << "\n\t this: " << this
+                );
+        }
+
+        const feature_extractor& get_feature_extractor (
+        ) const { return fe; }
+
+        const matrix<double,0,1>& get_weights (
+        ) const { return weights; }
+
+        unsigned long num_labels (
+        ) const { return fe.num_labels(); }
+
+        labeled_sequence_type operator() (
+            const sample_sequence_type& x
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(num_labels() > 0,
+                "\t labeled_sequence_type sequence_labeler::operator()(x)"
+                << "\n\t You can't have no labels."
+                << "\n\t this: " << this
+                );
+
+            labeled_sequence_type y;
+            find_max_factor_graph_viterbi(map_prob(x,fe,weights), y);
+            return y;
+        }
+
+        void label_sequence (
+            const sample_sequence_type& x,
+            labeled_sequence_type& y
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(num_labels() > 0,
+                "\t void sequence_labeler::label_sequence(x,y)"
+                << "\n\t You can't have no labels."
+                << "\n\t this: " << this
+                );
+
+            find_max_factor_graph_viterbi(map_prob(x,fe,weights), y);
+        }
+
+    private:
+
+        feature_extractor fe;
+        matrix<double,0,1> weights;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    void serialize (
+        const sequence_labeler<feature_extractor>& item,
+        std::ostream& out
+    )
+    {
+        serialize(item.get_feature_extractor(), out);
+        serialize(item.get_weights(), out);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    void deserialize (
+        sequence_labeler<feature_extractor>& item,
+        std::istream& in 
+    )
+    {
+        feature_extractor fe;
+        matrix<double,0,1> weights;
+
+        deserialize(fe, in);
+        deserialize(weights, in);
+
+        item = sequence_labeler<feature_extractor>(weights, fe);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SEQUENCE_LAbELER_H_h_
+
diff --git a/ml/dlib/dlib/svm/sequence_labeler_abstract.h b/ml/dlib/dlib/svm/sequence_labeler_abstract.h
new file mode 100644
index 000000000..3970b723a
--- /dev/null
+++ b/ml/dlib/dlib/svm/sequence_labeler_abstract.h
@@ -0,0 +1,396 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SEQUENCE_LAbELER_ABSTRACT_H_h_
+#ifdef DLIB_SEQUENCE_LAbELER_ABSTRACT_H_h_
+
+#include "../matrix.h"
+#include <vector>
+#include "../optimization/find_max_factor_graph_viterbi_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class example_feature_extractor
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object defines the interface a feature extractor must implement
+                if it is to be used with the sequence_labeler defined at the bottom
+                of this file.  
+                
+                The model used by sequence_labeler objects is the following.  
+                Given an input sequence x, predict an output label sequence y
+                such that:
+                    y == argmax_Y dot(w, PSI(x,Y))
+                    Where w is a parameter vector.
+
+                Therefore, a feature extractor defines how the PSI(x,y) feature vector 
+                is calculated.  It also defines how many output labels there are as 
+                well as the order of the model.  
+
+                Finally, note that PSI(x,y) is a sum of feature vectors, each derived 
+                from the entire input sequence x but only part of the label sequence y.
+                Each of these constituent feature vectors is defined by the get_features() 
+                method of this class.
+
+            THREAD SAFETY
+                Instances of this object are required to be threadsafe, that is, it should
+                be safe for multiple threads to make concurrent calls to the member
+                functions of this object.
+        !*/
+
+    public:
+        // This should be the type used to represent an input sequence.  It can be
+        // anything so long as it has a .size() which returns the length of the sequence.
+        typedef the_type_used_to_represent_a_sequence sequence_type;
+
+        example_feature_extractor (
+        ); 
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        unsigned long num_features (
+        ) const;
+        /*!
+            ensures
+                - returns the dimensionality of the PSI() feature vector.  
+        !*/
+
+        unsigned long order(
+        ) const; 
+        /*!
+            ensures
+                - This object represents a Markov model on the output labels.
+                  This parameter defines the order of the model.  That is, this 
+                  value controls how many previous label values get to be taken 
+                  into consideration when performing feature extraction for a
+                  particular element of the input sequence.  Note that the runtime
+                  of the algorithm is exponential in the order.  So don't make order
+                  very large.
+        !*/
+
+        unsigned long num_labels(
+        ) const; 
+        /*!
+            ensures
+                - returns the number of possible output labels.
+        !*/
+
+        template <typename EXP>
+        bool reject_labeling (
+            const sequence_type& x,
+            const matrix_exp<EXP>& y,
+            unsigned long position
+        ) const;
+        /*!
+            requires
+                - EXP::type == unsigned long
+                  (i.e. y contains unsigned longs)
+                - position < x.size()
+                - y.size() == min(position, order()) + 1
+                - is_vector(y) == true
+                - max(y) < num_labels() 
+            ensures
+                - for all valid i:
+                    - interprets y(i) as the label corresponding to x[position-i]
+                - if (the labeling in y for x[position] is always the wrong labeling) then
+                    - returns true
+                      (note that reject_labeling() is just an optional tool to allow you 
+                      to overrule the normal labeling algorithm.  You don't have to use
+                      it.  So if you don't include a reject_labeling() method in your
+                      feature extractor it is the same as including one that always
+                      returns false.)
+                - else
+                    - returns false
+        !*/
+
+        template <typename feature_setter, typename EXP>
+        void get_features (
+            feature_setter& set_feature,
+            const sequence_type& x,
+            const matrix_exp<EXP>& y,
+            unsigned long position
+        ) const;
+        /*!
+            requires
+                - EXP::type == unsigned long
+                  (i.e. y contains unsigned longs)
+                - reject_labeling(x,y,position) == false
+                - position < x.size()
+                - y.size() == min(position, order()) + 1
+                - is_vector(y) == true
+                - max(y) < num_labels() 
+                - set_feature is a function object which allows expressions of the form:
+                    - set_features((unsigned long)feature_index, (double)feature_value);
+                    - set_features((unsigned long)feature_index);
+            ensures
+                - for all valid i:
+                    - interprets y(i) as the label corresponding to x[position-i]
+                - This function computes the part of PSI() corresponding to the x[position]
+                  element of the input sequence.  Moreover, this part of PSI() is returned as 
+                  a sparse vector by invoking set_feature().  For example, to set the feature 
+                  with an index of 55 to the value of 1 this method would call:
+                    set_feature(55);
+                  Or equivalently:
+                    set_feature(55,1);
+                  Therefore, the first argument to set_feature is the index of the feature 
+                  to be set while the second argument is the value the feature should take.
+                  Additionally, note that calling set_feature() multiple times with the same 
+                  feature index does NOT overwrite the old value, it adds to the previous 
+                  value.  For example, if you call set_feature(55) 3 times then it will
+                  result in feature 55 having a value of 3.
+                - This function only calls set_feature() with feature_index values < num_features()
+        !*/
+
+        unsigned long num_nonnegative_weights (
+        ) const;
+        /*!
+            ensures
+                - returns the number of elements of the w parameter vector which should be
+                  non-negative.  That is, this feature extractor is intended to be used
+                  with w vectors where the first num_nonnegative_weights() elements of w
+                  are >= 0.  That is, it should be the case that w(i) >= 0 for all i <
+                  num_nonnegative_weights().
+                - Note that num_nonnegative_weights() is just an optional method to allow
+                  you to tell a tool like the structural_sequence_labeling_trainer that the
+                  learned w should have a certain number of non-negative elements.
+                  Therefore, if you do not provide a num_nonnegative_weights() method in
+                  your feature extractor then it will default to a value of 0, indicating
+                  that all elements of the w parameter vector may be any value.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    void serialize(
+        const example_feature_extractor& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+    void deserialize(
+        example_feature_extractor& item, 
+        std::istream& in
+    );
+    /*!
+        provides deserialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor 
+        >
+    bool contains_invalid_labeling (
+        const feature_extractor& fe,
+        const typename feature_extractor::sequence_type& x,
+        const std::vector<unsigned long>& y
+    );
+    /*!
+        requires
+            - feature_extractor must be an object that implements an interface compatible 
+              with the example_feature_extractor discussed above.
+        ensures
+            - if (x.size() != y.size() ||
+                fe.reject_labeling() rejects any of the labels in y) then
+                - returns true
+            - else
+                - returns false
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor 
+        >
+    bool contains_invalid_labeling (
+        const feature_extractor& fe,
+        const std::vector<typename feature_extractor::sequence_type>& x,
+        const std::vector<std::vector<unsigned long> >& y
+    );
+    /*!
+        requires
+            - feature_extractor must be an object that implements an interface compatible 
+              with the example_feature_extractor discussed above.
+        ensures
+            - if (x.size() != y.size() ||
+                contains_invalid_labeling(fe,x[i],y[i]) == true for some i ) then
+                - returns true
+            - else
+                - returns false
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class sequence_labeler
+    {
+        /*!
+            REQUIREMENTS ON feature_extractor
+                It must be an object that implements an interface compatible with 
+                the example_feature_extractor discussed above.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for doing sequence labeling.  In particular, it is
+                capable of representing sequence labeling models such as those produced by
+                Hidden Markov SVMs or Chain Structured Conditional Random fields.  See the
+                following papers for an introduction to these techniques:
+                    - Hidden Markov Support Vector Machines by 
+                      Y. Altun, I. Tsochantaridis, T. Hofmann
+                    - Shallow Parsing with Conditional Random Fields by 
+                      Fei Sha and Fernando Pereira
+
+
+                The model used by this object is the following.  Given
+                an input sequence x, predict an output label sequence y
+                such that:
+                    y == argmax_Y dot(get_weights(), PSI(x,Y))
+                    Where PSI() is defined by the feature_extractor template
+                    argument.  
+
+            THREAD SAFETY
+                It is always safe to use distinct instances of this object in different
+                threads.  However, when a single instance is shared between threads then
+                the following rules apply:
+                    It is safe to call the const members of this object from multiple
+                    threads so long as the feature_extractor is also threadsafe.  This is
+                    because the const members are purely read-only operations.  However,
+                    any operation that modifies a sequence_labeler is not threadsafe.
+        !*/
+
+    public:
+        typedef typename feature_extractor::sequence_type sample_sequence_type;
+        typedef std::vector<unsigned long> labeled_sequence_type;
+
+        sequence_labeler(
+        );
+        /*!
+            ensures
+                - #get_feature_extractor() == feature_extractor() 
+                  (i.e. it will have its default value)
+                - #get_weights().size() == #get_feature_extractor().num_features()
+                - #get_weights() == 0
+        !*/
+
+        explicit sequence_labeler(
+            const matrix<double,0,1>& weights
+        ); 
+        /*!
+            requires
+                - feature_extractor().num_features() == weights.size()
+            ensures
+                - #get_feature_extractor() == feature_extractor() 
+                  (i.e. it will have its default value)
+                - #get_weights() == weights
+        !*/
+
+        sequence_labeler(
+            const matrix<double,0,1>& weights,
+            const feature_extractor& fe
+        ); 
+        /*!
+            requires
+                - fe.num_features() == weights.size()
+            ensures
+                - #get_feature_extractor() == fe
+                - #get_weights() == weights
+        !*/
+
+        const feature_extractor& get_feature_extractor (
+        ) const; 
+        /*!
+            ensures
+                - returns the feature extractor used by this object
+        !*/
+
+        const matrix<double,0,1>& get_weights (
+        ) const;
+        /*!
+            ensures
+                - returns the parameter vector associated with this sequence labeler. 
+                  The length of the vector is get_feature_extractor().num_features().  
+        !*/
+
+        unsigned long num_labels (
+        ) const;
+        /*!
+            ensures
+                - returns get_feature_extractor().num_labels()
+                  (i.e. returns the number of possible output labels for each 
+                  element of a sequence)
+        !*/
+
+        labeled_sequence_type operator() (
+            const sample_sequence_type& x
+        ) const;
+        /*!
+            requires
+                - num_labels() > 0
+            ensures
+                - returns a vector Y of label values such that:
+                    - Y.size() == x.size()
+                    - for all valid i: 
+                        - Y[i] == the predicted label for x[i]
+                        - 0 <= Y[i] < num_labels()
+        !*/
+
+        void label_sequence (
+            const sample_sequence_type& x,
+            labeled_sequence_type& y
+        ) const;
+        /*!
+            requires
+                - num_labels() > 0
+            ensures
+                - #y == (*this)(x)
+                  (i.e. This is just another interface to the operator() routine
+                  above.  This one avoids returning the results by value and therefore
+                  might be a little faster in some cases)
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    void serialize (
+        const sequence_labeler<feature_extractor>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    void deserialize (
+        sequence_labeler<feature_extractor>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SEQUENCE_LAbELER_ABSTRACT_H_h_
+
+
diff --git a/ml/dlib/dlib/svm/sequence_segmenter.h b/ml/dlib/dlib/svm/sequence_segmenter.h
new file mode 100644
index 000000000..237023efa
--- /dev/null
+++ b/ml/dlib/dlib/svm/sequence_segmenter.h
@@ -0,0 +1,468 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SEQUENCE_SeGMENTER_H_h_
+#define DLIB_SEQUENCE_SeGMENTER_H_h_
+
+#include "sequence_segmenter_abstract.h"
+#include "../matrix.h"
+#include "sequence_labeler.h"
+#include <vector>
+
+namespace dlib
+{
+    // This namespace contains implementation details for the sequence_segmenter.
+    namespace impl_ss
+    {
+
+    // ------------------------------------------------------------------------------------
+
+        // BIO/BILOU labels
+        const unsigned int BEGIN   = 0;
+        const unsigned int INSIDE  = 1;
+        const unsigned int OUTSIDE = 2;
+        const unsigned int LAST    = 3;
+        const unsigned int UNIT    = 4;
+
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename ss_feature_extractor>
+        class feature_extractor
+        {
+            /*!
+                WHAT THIS OBJECT REPRESENTS
+                    This is a feature extractor for a sequence_labeler.  It serves to map
+                    the interface defined by a sequence_labeler into the kind of interface
+                    defined for a sequence_segmenter.
+            !*/
+
+        public:
+            typedef typename ss_feature_extractor::sequence_type sequence_type;
+
+            ss_feature_extractor fe;
+
+            feature_extractor() {}
+            feature_extractor(const ss_feature_extractor& ss_fe_) : fe(ss_fe_) {}
+
+            unsigned long num_nonnegative_weights (
+            ) const
+            {
+                const unsigned long NL = ss_feature_extractor::use_BIO_model ? 3 : 5;
+                if (ss_feature_extractor::allow_negative_weights)
+                {
+                    return 0;
+                }
+                else
+                {
+                    // We make everything non-negative except for the label transition
+                    // and bias features.
+                    return num_features() - NL*NL - NL;
+                }
+            }
+
+            friend void serialize(const feature_extractor& item, std::ostream& out) 
+            {
+                serialize(item.fe, out);
+            }
+
+            friend void deserialize(feature_extractor& item, std::istream& in) 
+            {
+                deserialize(item.fe, in);
+            }
+
+            unsigned long num_features() const
+            {
+                const unsigned long NL = ss_feature_extractor::use_BIO_model ? 3 : 5;
+                if (ss_feature_extractor::use_high_order_features)
+                    return NL + NL*NL + (NL*NL+NL)*fe.num_features()*fe.window_size();
+                else
+                    return NL + NL*NL + NL*fe.num_features()*fe.window_size();
+            }
+
+            unsigned long order() const 
+            { 
+                return 1; 
+            }
+
+            unsigned long num_labels() const 
+            { 
+                if (ss_feature_extractor::use_BIO_model)
+                    return 3;
+                else
+                    return 5;
+            }
+
+        private:
+
+            template <typename feature_setter>
+            struct dot_functor
+            {
+                /*!
+                    WHAT THIS OBJECT REPRESENTS
+                        This class wraps the feature_setter used by a sequence_labeler
+                        and turns it into the kind needed by a sequence_segmenter.
+                !*/
+
+                dot_functor(feature_setter& set_feature_, unsigned long offset_) : 
+                    set_feature(set_feature_), offset(offset_) {}
+
+                feature_setter& set_feature;
+                unsigned long offset;
+
+                inline void operator() (
+                    unsigned long feat_index
+                )
+                {
+                    set_feature(offset+feat_index);
+                }
+
+                inline void operator() (
+                    unsigned long feat_index,
+                    double feat_value
+                )
+                {
+                    set_feature(offset+feat_index, feat_value);
+                }
+            };
+
+        public:
+
+            template <typename EXP>
+            bool reject_labeling (
+                const sequence_type& x,
+                const matrix_exp<EXP>& y,
+                unsigned long pos
+            ) const
+            {
+                if (ss_feature_extractor::use_BIO_model)
+                {
+                    // Don't allow BIO label patterns that don't correspond to a sensical
+                    // segmentation. 
+                    if (y.size() > 1 && y(0) == INSIDE && y(1) == OUTSIDE)
+                        return true;
+                    if (y.size() == 1 && y(0) == INSIDE)
+                        return true;
+                }
+                else
+                {
+                    // Don't allow BILOU label patterns that don't correspond to a sensical
+                    // segmentation. 
+                    if (y.size() > 1)
+                    {
+                        if (y(1) == BEGIN && y(0) == OUTSIDE)
+                            return true;
+                        if (y(1) == BEGIN && y(0) == UNIT)
+                            return true;
+                        if (y(1) == BEGIN && y(0) == BEGIN)
+                            return true;
+
+                        if (y(1) == INSIDE && y(0) == BEGIN)
+                            return true;
+                        if (y(1) == INSIDE && y(0) == OUTSIDE)
+                            return true;
+                        if (y(1) == INSIDE && y(0) == UNIT)
+                            return true;
+
+                        if (y(1) == OUTSIDE && y(0) == INSIDE)
+                            return true;
+                        if (y(1) == OUTSIDE && y(0) == LAST)
+                            return true;
+
+                        if (y(1) == LAST && y(0) == INSIDE)
+                            return true;
+                        if (y(1) == LAST && y(0) == LAST)
+                            return true;
+
+                        if (y(1) == UNIT && y(0) == INSIDE)
+                            return true;
+                        if (y(1) == UNIT && y(0) == LAST)
+                            return true;
+
+                        // if at the end of the sequence
+                        if (pos == x.size()-1)
+                        {
+                            if (y(0) == BEGIN)
+                                return true;
+                            if (y(0) == INSIDE)
+                                return true;
+                        }
+                    }
+                    else
+                    {
+                        if (y(0) == INSIDE)
+                            return true;
+                        if (y(0) == LAST)
+                            return true;
+
+                        // if at the end of the sequence
+                        if (pos == x.size()-1)
+                        {
+                            if (y(0) == BEGIN)
+                                return true;
+                        }
+                    }
+                }
+                return false;
+            }
+
+            template <typename feature_setter, typename EXP>
+            void get_features (
+                feature_setter& set_feature,
+                const sequence_type& x,
+                const matrix_exp<EXP>& y,
+                unsigned long position
+            ) const
+            {
+                unsigned long offset = 0;
+
+                const int window_size = fe.window_size();
+
+                const int base_dims = fe.num_features();
+                for (int i = 0; i < window_size; ++i)
+                {
+                    const long pos = i-window_size/2 + static_cast<long>(position);
+                    if (0 <= pos && pos < (long)x.size())
+                    {
+                        const unsigned long off1 = y(0)*base_dims;
+                        dot_functor<feature_setter> fs1(set_feature, offset+off1);
+                        fe.get_features(fs1, x, pos);
+
+                        if (ss_feature_extractor::use_high_order_features && y.size() > 1)
+                        {
+                            const unsigned long off2 = num_labels()*base_dims + (y(0)*num_labels()+y(1))*base_dims;
+                            dot_functor<feature_setter> fs2(set_feature, offset+off2);
+                            fe.get_features(fs2, x, pos);
+                        }
+                    }
+
+                    if (ss_feature_extractor::use_high_order_features)
+                        offset += num_labels()*base_dims + num_labels()*num_labels()*base_dims;
+                    else
+                        offset += num_labels()*base_dims;
+                }
+
+                // Pull out an indicator feature for the type of transition between the
+                // previous label and the current label.
+                if (y.size() > 1)
+                    set_feature(offset + y(1)*num_labels() + y(0));
+
+                offset += num_labels()*num_labels();
+                // pull out an indicator feature for the current label.  This is the per
+                // label bias.
+                set_feature(offset + y(0));
+            }
+        };
+
+    } // end namespace impl_ss
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    unsigned long total_feature_vector_size (
+        const feature_extractor& fe
+    )
+    {
+        const unsigned long NL = feature_extractor::use_BIO_model ? 3 : 5;
+        if (feature_extractor::use_high_order_features)
+            return NL + NL*NL + (NL*NL+NL)*fe.num_features()*fe.window_size();
+        else
+            return NL + NL*NL + NL*fe.num_features()*fe.window_size();
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class sequence_segmenter
+    {
+    public:
+        typedef typename feature_extractor::sequence_type sample_sequence_type;
+        typedef std::vector<std::pair<unsigned long, unsigned long> > segmented_sequence_type;
+
+
+        sequence_segmenter()
+        {
+#ifdef ENABLE_ASSERTS
+            const feature_extractor& fe = labeler.get_feature_extractor().fe;
+            DLIB_ASSERT(fe.window_size() >= 1 && fe.num_features() >= 1,
+                "\t sequence_segmenter::sequence_segmenter()"
+                << "\n\t An invalid feature extractor was supplied."
+                << "\n\t fe.window_size():  " << fe.window_size() 
+                << "\n\t fe.num_features(): " << fe.num_features() 
+                << "\n\t this: " << this
+            );
+#endif
+        }
+
+        explicit sequence_segmenter(
+            const matrix<double,0,1>& weights
+        ) : 
+            labeler(weights)
+        {
+#ifdef ENABLE_ASSERTS
+            const feature_extractor& fe = labeler.get_feature_extractor().fe;
+            // make sure requires clause is not broken
+            DLIB_ASSERT(total_feature_vector_size(fe) == (unsigned long)weights.size(),
+                "\t sequence_segmenter::sequence_segmenter(weights)"
+                << "\n\t These sizes should match"
+                << "\n\t total_feature_vector_size(fe):  " << total_feature_vector_size(fe) 
+                << "\n\t weights.size(): " << weights.size() 
+                << "\n\t this: " << this
+                );
+            DLIB_ASSERT(fe.window_size() >= 1 && fe.num_features() >= 1,
+                "\t sequence_segmenter::sequence_segmenter()"
+                << "\n\t An invalid feature extractor was supplied."
+                << "\n\t fe.window_size():  " << fe.window_size() 
+                << "\n\t fe.num_features(): " << fe.num_features() 
+                << "\n\t this: " << this
+            );
+#endif
+        }
+
+        sequence_segmenter(
+            const matrix<double,0,1>& weights,
+            const feature_extractor& fe
+        ) :
+            labeler(weights, impl_ss::feature_extractor<feature_extractor>(fe))
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(total_feature_vector_size(fe) == (unsigned long)weights.size(),
+                "\t sequence_segmenter::sequence_segmenter(weights,fe)"
+                << "\n\t These sizes should match"
+                << "\n\t total_feature_vector_size(fe):  " << total_feature_vector_size(fe) 
+                << "\n\t weights.size(): " << weights.size() 
+                << "\n\t this: " << this
+                );
+            DLIB_ASSERT(fe.window_size() >= 1 && fe.num_features() >= 1,
+                "\t sequence_segmenter::sequence_segmenter()"
+                << "\n\t An invalid feature extractor was supplied."
+                << "\n\t fe.window_size():  " << fe.window_size() 
+                << "\n\t fe.num_features(): " << fe.num_features() 
+                << "\n\t this: " << this
+            );
+        }
+
+        const feature_extractor& get_feature_extractor (
+        ) const { return labeler.get_feature_extractor().fe; }
+
+        const matrix<double,0,1>& get_weights (
+        ) const { return labeler.get_weights(); }
+
+        segmented_sequence_type operator() (
+            const sample_sequence_type& x
+        ) const
+        {
+            segmented_sequence_type y;
+            segment_sequence(x,y);
+            return y;
+        }
+
+        void segment_sequence (
+            const sample_sequence_type& x,
+            segmented_sequence_type& y
+        ) const
+        {
+            y.clear();
+            std::vector<unsigned long> labels;
+            labeler.label_sequence(x, labels);
+
+            if (feature_extractor::use_BIO_model)
+            {
+                // Convert from BIO tagging to the explicit segments representation.
+                for (unsigned long i = 0; i < labels.size(); ++i)
+                {
+                    if (labels[i] == impl_ss::BEGIN)
+                    {
+                        const unsigned long begin = i;
+                        ++i;
+                        while (i < labels.size() && labels[i] == impl_ss::INSIDE)
+                            ++i;
+
+                        y.push_back(std::make_pair(begin, i));
+                        --i;
+                    }
+                }
+            }
+            else
+            {
+                // Convert from BILOU tagging to the explicit segments representation.
+                for (unsigned long i = 0; i < labels.size(); ++i)
+                {
+                    if (labels[i] == impl_ss::BEGIN)
+                    {
+                        const unsigned long begin = i;
+                        ++i;
+                        while (i < labels.size() && labels[i] == impl_ss::INSIDE)
+                            ++i;
+
+                        y.push_back(std::make_pair(begin, i+1));
+                    }
+                    else if (labels[i] == impl_ss::UNIT)
+                    {
+                        y.push_back(std::make_pair(i, i+1));
+                    }
+                }
+            }
+        }
+
+        friend void serialize(const sequence_segmenter& item, std::ostream& out)
+        {
+            int version = 1;
+            serialize(version, out);
+
+            // Save these just so we can compare them when we deserialize and make
+            // sure the feature_extractor being used is compatible with the model being
+            // loaded.
+            serialize(feature_extractor::use_BIO_model, out);
+            serialize(feature_extractor::use_high_order_features, out);
+            serialize(total_feature_vector_size(item.get_feature_extractor()), out);
+
+            serialize(item.labeler, out);
+        }
+
+        friend void deserialize(sequence_segmenter& item, std::istream& in)
+        {
+            int version = 0;
+            deserialize(version, in);
+            if (version != 1)
+                throw serialization_error("Unexpected version found while deserializing dlib::sequence_segmenter.");
+
+            // Try to check if the saved model is compatible with the current feature
+            // extractor.
+            bool use_BIO_model, use_high_order_features;
+            unsigned long dims;
+            deserialize(use_BIO_model, in);
+            deserialize(use_high_order_features, in);
+            deserialize(dims, in);
+            deserialize(item.labeler, in);
+            if (use_BIO_model != feature_extractor::use_BIO_model)
+            {
+                throw serialization_error("Incompatible feature extractor found while deserializing "
+                    "dlib::sequence_segmenter. Wrong value of use_BIO_model.");
+            }
+            if (use_high_order_features != feature_extractor::use_high_order_features)
+            {
+                throw serialization_error("Incompatible feature extractor found while deserializing "
+                    "dlib::sequence_segmenter. Wrong value of use_high_order_features.");
+            }
+            if (dims != total_feature_vector_size(item.get_feature_extractor()))
+            {
+                throw serialization_error("Incompatible feature extractor found while deserializing "
+                    "dlib::sequence_segmenter. Wrong value of total_feature_vector_size().");
+            }
+        }
+
+    private:
+        sequence_labeler<impl_ss::feature_extractor<feature_extractor> > labeler;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SEQUENCE_SeGMENTER_H_h_
+
+
diff --git a/ml/dlib/dlib/svm/sequence_segmenter_abstract.h b/ml/dlib/dlib/svm/sequence_segmenter_abstract.h
new file mode 100644
index 000000000..7229fee22
--- /dev/null
+++ b/ml/dlib/dlib/svm/sequence_segmenter_abstract.h
@@ -0,0 +1,452 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SEQUENCE_SeGMENTER_ABSTRACT_H_h_
+#ifdef DLIB_SEQUENCE_SeGMENTER_ABSTRACT_H_h_
+
+#include "../matrix.h"
+#include <vector>
+#include "sequence_labeler_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class example_feature_extractor
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object defines the interface a feature extractor must implement if it
+                is to be used with the sequence_segmenter defined at the bottom of this
+                file.  
+                
+                The model used by sequence_segmenter objects is the following.  Given an
+                input sequence x, predict an output label sequence y such that:
+                    y == argmax_Y dot(w, PSI(x,Y))
+                Where w is a parameter vector and the label sequence defines a segmentation
+                of x.
+
+                Recall that a sequence_segmenter uses the BIO or BILOU tagging model and is
+                also an instantiation of the dlib::sequence_labeler.  Selecting to use the
+                BIO model means that each element of the label sequence y takes on one of
+                three possible values (B, I, or O) and together these labels define a
+                segmentation of the sequence.  For example, to represent a segmentation of
+                the sequence of words "The dog ran to Bob Jones" where only "Bob Jones" was
+                segmented out we would use the label sequence OOOOBI.  The BILOU model is
+                similar except that it uses five different labels and each segment is
+                labeled as U, BL, BIL, BIIL, BIIIL, and so on depending on its length.
+                Therefore, the BILOU model is able to more explicitly model the ends of the
+                segments than the BIO model, but has more parameters to estimate.
+                
+                Keeping all this in mind, the purpose of a sequence_segmenter is to take
+                care of the bookkeeping associated with creating BIO/BILOU tagging models
+                for segmentation tasks.  In particular, it presents the user with a
+                simplified version of the interface used by the dlib::sequence_labeler.  It
+                does this by completely hiding the BIO/BILOU tags from the user and instead
+                exposes an explicit sub-segment based labeling representation.  It also
+                simplifies the construction of the PSI() feature vector. 
+
+                Like in the dlib::sequence_labeler, PSI() is a sum of feature vectors, each
+                derived from the entire input sequence x but only part of the label
+                sequence y.  In the case of the sequence_segmenter, we use an order one
+                Markov model.  This means that 
+                    PSI(x,y) == sum_i XI(x, y_{i-1}, y_{i}, i)
+                where the sum is taken over all the elements in the sequence.  At each
+                element we extract a feature vector, XI(), that is expected to encode
+                important details describing what the i-th position of the sequence looks
+                like in the context of the current and previous labels.  To do this, XI()
+                is allowed to look at any part of the input sequence x, the current and
+                previous labels, and of course it must also know the position in question, i.  
+                
+                The sequence_segmenter simplifies this further by decomposing XI() into
+                components which model the current window around each position as well as
+                the conjunction of the current window around each position and the previous
+                label.  In particular, the sequence_segmenter only asks a user to provide a
+                single feature vector which characterizes a position of the sequence
+                independent of any labeling.  We denote this feature vector by ZI(x,i), where
+                x is the sequence and i is the position in question.  
+                
+                For example, suppose we use a window size of 3 and BIO tags, then we can
+                put all this together and define XI() in terms of ZI().  To do this, we can
+                think of XI() as containing 12*3 slots which contain either a zero vector
+                or a ZI() vector.  Each combination of window position and labeling has a
+                different slot.  To explain further, consider the following examples where
+                we have annotated which parts of XI() correspond to each slot.  
+
+                If the previous and current label are both B and we use a window size of 3
+                then XI() would be instantiated as:
+                    XI(x, B, B, i) = [ZI(x,i-1)  \ 
+                                      ZI(x,i)     > If current label is B
+                                      ZI(x,i+1)  /  
+                                      0          \                        
+                                      0           > If current label is I 
+                                      0          /                        
+                                      0          \                        
+                                      0           > If current label is O 
+                                      0          /  
+
+                                      ZI(x,i-1)  \ 
+                                      ZI(x,i)     > If previous label is B and current label is B
+                                      ZI(x,i+1)  /  
+                                      0          \                        
+                                      0           > If previous label is B and current label is I 
+                                      0          /                        
+                                      0          \                        
+                                      0           > If previous label is B and current label is O 
+                                      0          /  
+
+                                      0          \ 
+                                      0           > If previous label is I and current label is B
+                                      0          /  
+                                      0          \                        
+                                      0           > If previous label is I and current label is I 
+                                      0          /                        
+                                      0          \                        
+                                      0           > If previous label is I and current label is O 
+                                      0          /  
+
+                                      0          \ 
+                                      0           > If previous label is O and current label is B
+                                      0          /  
+                                      0          \                        
+                                      0           > If previous label is O and current label is I 
+                                      0          /                        
+                                      0          \                        
+                                      0           > If previous label is O and current label is O 
+                                      0]         /  
+
+
+                If the previous label is I and the current label is O and we use a window
+                size of 3 then XI() would be instantiated as:
+                    XI(x, I, O, i) = [0          \ 
+                                      0           > If current label is B
+                                      0          /  
+                                      0          \                        
+                                      0           > If current label is I 
+                                      0          /                        
+                                      ZI(x,i-1)  \                        
+                                      ZI(x,i)     > If current label is O 
+                                      ZI(x,i+1)  /  
+
+                                      0          \ 
+                                      0           > If previous label is B and current label is B
+                                      0          /  
+                                      0          \                        
+                                      0           > If previous label is B and current label is I 
+                                      0          /                        
+                                      0          \                        
+                                      0           > If previous label is B and current label is O 
+                                      0          /  
+                                                                                                   
+                                      0          \ 
+                                      0           > If previous label is I and current label is B
+                                      0          /  
+                                      0          \                        
+                                      0           > If previous label is I and current label is I 
+                                      0          /                        
+                                      ZI(x,i-1)  \                        
+                                      ZI(x,i)     > If previous label is I and current label is O 
+                                      ZI(x,i+1)  /  
+                                                                                                   
+                                      0          \ 
+                                      0           > If previous label is O and current label is B
+                                      0          /  
+                                      0          \                        
+                                      0           > If previous label is O and current label is I 
+                                      0          /                        
+                                      0          \                        
+                                      0           > If previous label is O and current label is O 
+                                      0]         /  
+                    
+                    If we had instead used the BILOU tagging model the XI() vector would
+                    have been similarly defined except that there would be 30*3 slots for
+                    the various label combination instead of 12*3.
+
+                    Finally, while not shown here, we also include indicator features in
+                    XI() to model label transitions and individual label biases.  These are
+                    12 extra features in the case of the BIO tagging model and 30 extra in
+                    the case of the BILOU tagging model.
+
+            THREAD SAFETY
+                Instances of this object are required to be threadsafe, that is, it should
+                be safe for multiple threads to make concurrent calls to the member
+                functions of this object.
+        !*/
+
+    public:
+        // This should be the type used to represent an input sequence.  It can be
+        // anything so long as it has a .size() which returns the length of the sequence.
+        typedef the_type_used_to_represent_a_sequence sequence_type;
+
+        // If you want to use the BIO tagging model then set this bool to true.  Set it to
+        // false to use the BILOU tagging model.
+        const static bool use_BIO_model = true;
+
+        // In the WHAT THIS OBJECT REPRESENTS section above we discussed how we model the
+        // conjunction of the previous label and the window around each position.  Doing
+        // this greatly expands the size of the parameter vector w.  You can optionally
+        // disable these higher order features by setting the use_high_order_features bool
+        // to false.  This will cause XI() to include only slots which are independent of
+        // the previous label. 
+        const static bool use_high_order_features = true;
+
+        // You use a tool like the structural_sequence_segmentation_trainer to learn the weight
+        // vector needed by a sequence_segmenter.  You can tell the trainer to force all the
+        // elements of the weight vector corresponding to ZI() to be non-negative.  This is all
+        // the elements of w except for the elements corresponding to the label transition and
+        // bias indicator features.  To do this, just set allow_negative_weights to false.  
+        const static bool allow_negative_weights = true;
+
+
+        example_feature_extractor (
+        ); 
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        unsigned long num_features(
+        ) const; 
+        /*!
+            ensures
+                - returns the dimensionality of the ZI() feature vector.  This number is
+                  always >= 1
+        !*/
+
+        unsigned long window_size(
+        ) const;
+        /*!
+            ensures
+                - returns the size of the window ZI() vectors are extracted from.  This
+                  number is always >= 1.
+        !*/
+
+        template <typename feature_setter>
+        void get_features (
+            feature_setter& set_feature,
+            const sequence_type& x,
+            unsigned long position
+        ) const;
+        /*!
+            requires
+                - position < x.size()
+                - set_feature is a function object which allows expressions of the form:
+                    - set_features((unsigned long)feature_index, (double)feature_value);
+                    - set_features((unsigned long)feature_index);
+            ensures
+                - This function computes the ZI(x,position) feature vector.  This is a
+                  feature vector which should capture the properties of x[position] that
+                  are informative relative to the sequence segmentation task you are trying
+                  to perform.
+                - ZI(x,position) is returned as a sparse vector by invoking set_feature().
+                  For example, to set the feature with an index of 55 to the value of 1
+                  this method would call:
+                    set_feature(55);
+                  Or equivalently:
+                    set_feature(55,1);
+                  Therefore, the first argument to set_feature is the index of the feature
+                  to be set while the second argument is the value the feature should take.
+                  Additionally, note that calling set_feature() multiple times with the
+                  same feature index does NOT overwrite the old value, it adds to the
+                  previous value.  For example, if you call set_feature(55) 3 times then it
+                  will result in feature 55 having a value of 3.
+                - This function only calls set_feature() with feature_index values < num_features()
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    void serialize(
+        const example_feature_extractor& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+    void deserialize(
+        example_feature_extractor& item, 
+        std::istream& in
+    );
+    /*!
+        provides deserialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    unsigned long total_feature_vector_size (
+        const feature_extractor& fe
+    );
+    /*!
+        requires
+            - fe must be an object that implements an interface compatible with the
+              example_feature_extractor discussed above.
+        ensures
+            - returns the dimensionality of the PSI() vector defined by the given feature
+              extractor.  
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class sequence_segmenter
+    {
+        /*!
+            REQUIREMENTS ON feature_extractor
+                It must be an object that implements an interface compatible with 
+                the example_feature_extractor discussed above.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for segmenting a sequence of objects into a set of
+                non-overlapping chunks.  An example sequence segmentation task is to take
+                English sentences and identify all the named entities.  In this example,
+                you would be using a sequence_segmenter to find all the chunks of
+                contiguous words which refer to proper names.
+
+                Internally, the sequence_segmenter uses the BIO (Begin, Inside, Outside) or
+                BILOU (Begin, Inside, Last, Outside, Unit) sequence tagging model.
+                Moreover, it is implemented using a dlib::sequence_labeler object and
+                therefore sequence_segmenter objects are examples of chain structured
+                conditional random field style sequence taggers. 
+
+            THREAD SAFETY
+                It is always safe to use distinct instances of this object in different
+                threads.  However, when a single instance is shared between threads then
+                the following rules apply:
+                    It is safe to call the const members of this object from multiple
+                    threads so long as the feature_extractor is also threadsafe.  This is
+                    because the const members are purely read-only operations.  However,
+                    any operation that modifies a sequence_segmenter is not threadsafe.
+        !*/
+
+    public:
+        typedef typename feature_extractor::sequence_type sample_sequence_type;
+        typedef std::vector<std::pair<unsigned long, unsigned long> > segmented_sequence_type;
+
+        sequence_segmenter(
+        );
+        /*!
+            ensures
+                - #get_feature_extractor() == feature_extractor() 
+                  (i.e. it will have its default value)
+                - #get_weights().size() == total_feature_vector_size(#get_feature_extractor())
+                - #get_weights() == 0
+        !*/
+
+        explicit sequence_segmenter(
+            const matrix<double,0,1>& weights
+        ); 
+        /*!
+            requires
+                - total_feature_vector_size(feature_extractor()) == weights.size()
+            ensures
+                - #get_feature_extractor() == feature_extractor() 
+                  (i.e. it will have its default value)
+                - #get_weights() == weights
+        !*/
+
+        sequence_segmenter(
+            const matrix<double,0,1>& weights,
+            const feature_extractor& fe
+        ); 
+        /*!
+            requires
+                - total_feature_vector_size(fe) == weights.size()
+            ensures
+                - #get_feature_extractor() == fe
+                - #get_weights() == weights
+        !*/
+
+        const feature_extractor& get_feature_extractor (
+        ) const; 
+        /*!
+            ensures
+                - returns the feature extractor used by this object.
+        !*/
+
+        const matrix<double,0,1>& get_weights (
+        ) const;
+        /*!
+            ensures
+                - returns the parameter vector associated with this sequence segmenter. 
+                  The length of the vector is total_feature_vector_size(get_feature_extractor()).  
+        !*/
+
+        segmented_sequence_type operator() (
+            const sample_sequence_type& x
+        ) const;
+        /*!
+            ensures
+                - Takes an input sequence and returns a list of detected segments within
+                  that sequence.
+                - None of the returned segments will overlap.
+                - The returned segments are listed in the order they appeared in the input sequence.
+                - To be precise, this function returns a std::vector Y of segments such that:
+                    - Y.size() == the number of segments detected in the input sequence x.
+                    - for all valid i:
+                        - Y[i].first  == the start of the i-th segment.
+                        - Y[i].second == one past the end of the i-th segment.
+                        - Therefore, the i-th detected segment in x is composed of the elements
+                          x[Y[i].first], x[Y[i].first+1], ..., x[Y[i].second-1]
+                        - Y[i].first < x.size()
+                        - Y[i].second <= x.size()
+                        - Y[i].first < Y[i].second
+                          (i.e. This function never outputs empty segments)
+                        - Y[i].second <= Y[i+1].first
+                          (i.e. the segments are listed in order of appearance and do not overlap)
+        !*/
+
+        void segment_sequence (
+            const sample_sequence_type& x,
+            segmented_sequence_type& y
+        ) const;
+        /*!
+            ensures
+                - #y == (*this)(x)
+                  (i.e. This is just another interface to the operator() routine
+                  above.  This one avoids returning the results by value and therefore
+                  might be a little faster in some cases)
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    void serialize (
+        const sequence_segmenter<feature_extractor>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    void deserialize (
+        sequence_segmenter<feature_extractor>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SEQUENCE_SeGMENTER_ABSTRACT_H_h_
+
diff --git a/ml/dlib/dlib/svm/simplify_linear_decision_function.h b/ml/dlib/dlib/svm/simplify_linear_decision_function.h
new file mode 100644
index 000000000..4f5bef6f3
--- /dev/null
+++ b/ml/dlib/dlib/svm/simplify_linear_decision_function.h
@@ -0,0 +1,110 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SIMPLIFY_LINEAR_DECiSION_FUNCTION_Hh_
+#define DLIB_SIMPLIFY_LINEAR_DECiSION_FUNCTION_Hh_
+
+#include "simplify_linear_decision_function_abstract.h"
+#include "../algs.h"
+#include "function.h"
+#include "sparse_kernel.h"
+#include "kernel.h"
+#include <map>
+#include <vector>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    decision_function<sparse_linear_kernel<T> > simplify_linear_decision_function (
+        const decision_function<sparse_linear_kernel<T> >& df
+    )
+    {
+        // don't do anything if we don't have to
+        if (df.basis_vectors.size() <= 1)
+            return df;
+
+        decision_function<sparse_linear_kernel<T> > new_df;
+
+        new_df.b = df.b;
+        new_df.basis_vectors.set_size(1);
+        new_df.alpha.set_size(1);
+        new_df.alpha(0) = 1;
+
+        // now compute the weighted sum of all the sparse basis_vectors in df
+        typedef typename T::value_type pair_type;
+        typedef typename pair_type::first_type key_type;
+        typedef typename pair_type::second_type value_type;
+        std::map<key_type, value_type> accum;
+        for (long i = 0; i < df.basis_vectors.size(); ++i)
+        {
+            typename T::const_iterator j = df.basis_vectors(i).begin();
+            const typename T::const_iterator end = df.basis_vectors(i).end();
+            for (; j != end; ++j)
+            {
+                accum[j->first] += df.alpha(i) * (j->second);
+            }
+        }
+
+        new_df.basis_vectors(0) = T(accum.begin(), accum.end());
+
+        return new_df;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    decision_function<linear_kernel<T> > simplify_linear_decision_function (
+        const decision_function<linear_kernel<T> >& df
+    )
+    {
+        // don't do anything if we don't have to
+        if (df.basis_vectors.size() <= 1)
+            return df;
+
+        decision_function<linear_kernel<T> > new_df;
+
+        new_df.b = df.b;
+        new_df.basis_vectors.set_size(1);
+        new_df.alpha.set_size(1);
+        new_df.alpha(0) = 1;
+
+        // now compute the weighted sum of all the basis_vectors in df
+        new_df.basis_vectors(0) = 0;
+        for (long i = 0; i < df.basis_vectors.size(); ++i)
+        {
+            new_df.basis_vectors(0) += df.alpha(i) * df.basis_vectors(i);
+        }
+
+        return new_df;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    decision_function<linear_kernel<T> > simplify_linear_decision_function (
+        const normalized_function<decision_function<linear_kernel<T> >, vector_normalizer<T> >& df
+    )
+    {
+        decision_function<linear_kernel<T> > new_df = simplify_linear_decision_function(df.function);
+
+        // now incorporate the normalization stuff into new_df
+        new_df.basis_vectors(0) = pointwise_multiply(new_df.basis_vectors(0), df.normalizer.std_devs());
+        new_df.b += dot(new_df.basis_vectors(0), df.normalizer.means());
+
+        return new_df;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SIMPLIFY_LINEAR_DECiSION_FUNCTION_Hh_
+
diff --git a/ml/dlib/dlib/svm/simplify_linear_decision_function_abstract.h b/ml/dlib/dlib/svm/simplify_linear_decision_function_abstract.h
new file mode 100644
index 000000000..cff8ae11f
--- /dev/null
+++ b/ml/dlib/dlib/svm/simplify_linear_decision_function_abstract.h
@@ -0,0 +1,74 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SIMPLIFY_LINEAR_DECiSION_FUNCTION_ABSTRACT_Hh_
+#ifdef DLIB_SIMPLIFY_LINEAR_DECiSION_FUNCTION_ABSTRACT_Hh_
+
+#include "../algs.h"
+#include "function_abstract.h"
+#include "sparse_kernel_abstract.h"
+#include "kernel_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    decision_function<sparse_linear_kernel<T> > simplify_linear_decision_function (
+        const decision_function<sparse_linear_kernel<T> >& df
+    );
+    /*!
+        requires
+            - T must be a sparse vector as defined in dlib/svm/sparse_vector_abstract.h
+        ensures
+            - returns a simplified version of df that only has one basis vector.  That
+              is, returns a decision function D such that:
+                - D.basis_vectors.size() == 1 (or 0 if df is empty)
+                - for all possible x: D(x) == df(x)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    decision_function<linear_kernel<T> > simplify_linear_decision_function (
+        const decision_function<linear_kernel<T> >& df
+    );
+    /*!
+        requires
+            - T must be a dlib::matrix object 
+        ensures
+            - returns a simplified version of df that only has one basis vector.  That
+              is, returns a decision function D such that:
+                - D.basis_vectors.size() == 1 (or 0 if df is empty)
+                - for all possible x: D(x) == df(x)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    decision_function<linear_kernel<T> > simplify_linear_decision_function (
+        const normalized_function<decision_function<linear_kernel<T> >, vector_normalizer<T> >& df
+    );
+    /*!
+        requires
+            - T must be a dlib::matrix object 
+        ensures
+            - returns a simplified version of df that only has one basis vector and 
+              doesn't involve an explicit vector_normalizer.  That is, returns a 
+              decision function D such that:
+                - D.basis_vectors.size() == 1 (or 0 if df is empty)
+                - for all possible x: D(x) == df(x)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SIMPLIFY_LINEAR_DECiSION_FUNCTION_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/sort_basis_vectors.h b/ml/dlib/dlib/svm/sort_basis_vectors.h
new file mode 100644
index 000000000..1d4605b41
--- /dev/null
+++ b/ml/dlib/dlib/svm/sort_basis_vectors.h
@@ -0,0 +1,224 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SORT_BASIS_VECTORs_Hh_
+#define DLIB_SORT_BASIS_VECTORs_Hh_
+
+#include <vector>
+
+#include "sort_basis_vectors_abstract.h"
+#include "../matrix.h"
+#include "../statistics.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace bs_impl 
+    {
+        template <typename EXP>
+        typename EXP::matrix_type invert (
+            const matrix_exp<EXP>& m
+        )
+        {
+            eigenvalue_decomposition<EXP> eig(make_symmetric(m));
+
+            typedef typename EXP::type scalar_type;
+            typedef typename EXP::mem_manager_type mm_type;
+
+            matrix<scalar_type,0,1,mm_type> vals = eig.get_real_eigenvalues();
+
+            const scalar_type max_eig = max(abs(vals));
+            const scalar_type thresh = max_eig*std::sqrt(std::numeric_limits<scalar_type>::epsilon());
+
+            // Since m might be singular or almost singular we need to do something about
+            // any very small eigenvalues.  So here we set the smallest eigenvalues to
+            // be equal to a large value to make the inversion stable.  We can't just set
+            // them to zero like in a normal pseudo-inverse since we want the resulting
+            // inverse matrix to be full rank.
+            for (long i = 0; i < vals.size(); ++i)
+            {
+                if (std::abs(vals(i)) < thresh)
+                    vals(i) = max_eig;
+            }
+
+            // Build the inverse matrix.  This is basically a pseudo-inverse.
+            return make_symmetric(eig.get_pseudo_v()*diagm(reciprocal(vals))*trans(eig.get_pseudo_v()));
+        }
+
+// ----------------------------------------------------------------------------------------
+
+        template <
+            typename kernel_type,
+            typename vect1_type,
+            typename vect2_type,
+            typename vect3_type
+            >
+        const std::vector<typename kernel_type::sample_type> sort_basis_vectors_impl (
+            const kernel_type& kern,
+            const vect1_type& samples,
+            const vect2_type& labels,
+            const vect3_type& basis,
+            double eps 
+        )
+        {
+            DLIB_ASSERT(is_binary_classification_problem(samples, labels) &&
+                        0 < eps && eps <= 1 && 
+                        basis.size() > 0,
+                        "\t void sort_basis_vectors()"
+                        << "\n\t Invalid arguments were given to this function."
+                        << "\n\t is_binary_classification_problem(samples, labels): " << is_binary_classification_problem(samples, labels)
+                        << "\n\t basis.size(): " << basis.size() 
+                        << "\n\t eps:          " << eps 
+            );
+
+            typedef typename kernel_type::scalar_type scalar_type;
+            typedef typename kernel_type::mem_manager_type mm_type;
+
+            typedef matrix<scalar_type,0,1,mm_type> col_matrix;
+            typedef matrix<scalar_type,0,0,mm_type> gen_matrix;
+
+            col_matrix c1_mean, c2_mean, temp, delta;
+
+
+            col_matrix weights;
+
+            running_covariance<gen_matrix> cov;
+
+            // compute the covariance matrix and the means of the two classes.
+            for (long i = 0; i < samples.size(); ++i)
+            {
+                temp = kernel_matrix(kern, basis, samples(i));
+                cov.add(temp);
+                if (labels(i) > 0)
+                    c1_mean += temp;
+                else
+                    c2_mean += temp;
+            }
+
+            c1_mean /= sum(labels > 0);
+            c2_mean /= sum(labels < 0);
+
+            delta = c1_mean - c2_mean;
+
+            gen_matrix cov_inv = bs_impl::invert(cov.covariance());
+
+
+            matrix<long,0,1,mm_type> total_perm = trans(range(0, delta.size()-1));
+            matrix<long,0,1,mm_type> perm = total_perm;
+
+            std::vector<std::pair<scalar_type,long> > sorted_feats(delta.size());
+
+            long best_size = delta.size();
+            long misses = 0;
+            matrix<long,0,1,mm_type> best_total_perm = perm;
+
+            // Now we basically find fisher's linear discriminant over and over.  Each
+            // time sorting the features so that the most important ones pile up together.
+            weights = trans(chol(cov_inv))*delta;
+            while (true)
+            {
+
+                for (unsigned long i = 0; i < sorted_feats.size(); ++i)
+                    sorted_feats[i] = make_pair(std::abs(weights(i)), i);
+
+                std::sort(sorted_feats.begin(), sorted_feats.end());
+
+                // make a permutation vector according to the sorting
+                for (long i = 0; i < perm.size(); ++i)
+                    perm(i) = sorted_feats[i].second;
+
+
+                // Apply the permutation.  Doing this gives the same result as permuting all the
+                // features and then recomputing the delta and cov_inv from scratch.
+                cov_inv = subm(cov_inv,perm,perm);
+                delta = rowm(delta,perm);
+
+                // Record all the permutations we have done so we will know how the final
+                // weights match up with the original basis vectors when we are done.
+                total_perm = rowm(total_perm, perm);
+
+                // compute new Fisher weights for sorted features.
+                weights = trans(chol(cov_inv))*delta;
+
+                // Measure how many features it takes to account for eps% of the weights vector.
+                const scalar_type total_weight = length_squared(weights);
+                scalar_type weight_accum = 0;
+                long size = 0;
+                // figure out how to get eps% of the weights
+                for (long i = weights.size()-1; i >= 0; --i)
+                {
+                    ++size;
+                    weight_accum += weights(i)*weights(i);
+                    if (weight_accum/total_weight > eps)
+                        break;
+                }
+
+                // loop until the best_size stops dropping
+                if (size < best_size)
+                {
+                    misses = 0;
+                    best_size = size;
+                    best_total_perm = total_perm;
+                }
+                else
+                {
+                    ++misses;
+
+                    // Give up once we have had 10 rounds where we didn't find a weights vector with
+                    // a smaller concentration of good features. 
+                    if (misses >= 10)
+                        break;
+                }
+
+            }
+
+            // make sure best_size isn't zero
+            if (best_size == 0)
+                best_size = 1;
+
+            std::vector<typename kernel_type::sample_type> sorted_basis;
+
+            // permute the basis so that it matches up with the contents of the best weights 
+            sorted_basis.resize(best_size);
+            for (unsigned long i = 0; i < sorted_basis.size(); ++i)
+            {
+                // Note that we load sorted_basis backwards so that the most important
+                // basis elements come first.  
+                sorted_basis[i] = basis(best_total_perm(basis.size()-i-1));
+            }
+
+            return sorted_basis;
+        }
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename vect1_type,
+        typename vect2_type,
+        typename vect3_type
+        >
+    const std::vector<typename kernel_type::sample_type> sort_basis_vectors (
+        const kernel_type& kern,
+        const vect1_type& samples,
+        const vect2_type& labels,
+        const vect3_type& basis,
+        double eps = 0.99
+    )
+    {
+        return bs_impl::sort_basis_vectors_impl(kern, 
+                                                mat(samples),
+                                                mat(labels),
+                                                mat(basis),
+                                                eps);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SORT_BASIS_VECTORs_Hh_
+
diff --git a/ml/dlib/dlib/svm/sort_basis_vectors_abstract.h b/ml/dlib/dlib/svm/sort_basis_vectors_abstract.h
new file mode 100644
index 000000000..b43dca170
--- /dev/null
+++ b/ml/dlib/dlib/svm/sort_basis_vectors_abstract.h
@@ -0,0 +1,59 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SORT_BASIS_VECTORs_ABSTRACT_Hh_
+#ifdef DLIB_SORT_BASIS_VECTORs_ABSTRACT_Hh_
+
+#include <vector>
+
+#include "../matrix.h"
+#include "../statistics.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename kernel_type,
+        typename vect1_type,
+        typename vect2_type,
+        typename vect3_type
+        >
+    const std::vector<typename kernel_type::sample_type> sort_basis_vectors (
+        const kernel_type& kern,
+        const vect1_type& samples,
+        const vect2_type& labels,
+        const vect3_type& basis,
+        double eps = 0.99
+    );
+    /*!
+        requires
+            - is_binary_classification_problem(samples, labels)
+            - 0 < eps <= 1
+            - basis.size() > 0
+            - kernel_type is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+              It must be capable of operating on the elements of samples and basis.
+            - vect1_type == a matrix or something convertible to a matrix via mat()
+            - vect2_type == a matrix or something convertible to a matrix via mat()
+            - vect3_type == a matrix or something convertible to a matrix via mat()
+        ensures
+            - A kernel based learning method ultimately needs to select a set of basis functions
+              represented by a particular choice of kernel and a set of basis vectors.  
+              sort_basis_vectors() attempts to order the elements of basis so that elements which are
+              most useful in solving the binary classification problem defined by samples and
+              labels come first. 
+            - In particular, this function returns a std::vector, SB, of sorted basis vectors such that:
+                - 0 < SB.size() <= basis.size()
+                - SB will contain elements from basis but they will have been sorted so that 
+                  the most useful elements come first (i.e. SB[0] is the most important). 
+                - eps notionally controls how big SB will be.  Bigger eps corresponds to a 
+                  bigger basis.  You can think of it like asking for eps percent of the 
+                  discriminating power from the input basis.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SORT_BASIS_VECTORs_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/sparse_kernel.h b/ml/dlib/dlib/svm/sparse_kernel.h
new file mode 100644
index 000000000..f571135ec
--- /dev/null
+++ b/ml/dlib/dlib/svm/sparse_kernel.h
@@ -0,0 +1,384 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_SPARSE_KERNEL
+#define DLIB_SVm_SPARSE_KERNEL
+
+#include "sparse_kernel_abstract.h"
+#include <cmath>
+#include <limits>
+#include "../algs.h"
+#include "../serialize.h"
+#include "sparse_vector.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct sparse_radial_basis_kernel
+    {
+        typedef typename T::value_type::second_type scalar_type;
+        typedef T sample_type;
+        typedef default_memory_manager mem_manager_type;
+
+        sparse_radial_basis_kernel(const scalar_type g) : gamma(g) {}
+        sparse_radial_basis_kernel() : gamma(0.1) {}
+        sparse_radial_basis_kernel(
+            const sparse_radial_basis_kernel& k
+        ) : gamma(k.gamma) {}
+
+
+        const scalar_type gamma;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const
+        { 
+            const scalar_type d = distance_squared(a,b);
+            return std::exp(-gamma*d);
+        }
+
+        sparse_radial_basis_kernel& operator= (
+            const sparse_radial_basis_kernel& k
+        )
+        {
+            const_cast<scalar_type&>(gamma) = k.gamma;
+            return *this;
+        }
+
+        bool operator== (
+            const sparse_radial_basis_kernel& k
+        ) const
+        {
+            return gamma == k.gamma;
+        }
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const sparse_radial_basis_kernel<T>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.gamma, out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type sparse_radial_basis_kernel"); 
+        }
+    }
+
+    template <
+        typename T
+        >
+    void deserialize (
+        sparse_radial_basis_kernel<T>& item,
+        std::istream& in 
+    )
+    {
+        typedef typename T::value_type::second_type scalar_type;
+        try
+        {
+            deserialize(const_cast<scalar_type&>(item.gamma), in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type sparse_radial_basis_kernel"); 
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct sparse_polynomial_kernel
+    {
+        typedef typename T::value_type::second_type scalar_type;
+        typedef T sample_type;
+        typedef default_memory_manager mem_manager_type;
+
+        sparse_polynomial_kernel(const scalar_type g, const scalar_type c, const scalar_type d) : gamma(g), coef(c), degree(d) {}
+        sparse_polynomial_kernel() : gamma(1), coef(0), degree(1) {}
+        sparse_polynomial_kernel(
+            const sparse_polynomial_kernel& k
+        ) : gamma(k.gamma), coef(k.coef), degree(k.degree) {}
+
+        typedef T type;
+        const scalar_type gamma;
+        const scalar_type coef;
+        const scalar_type degree;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const
+        { 
+            return std::pow(gamma*(dot(a,b)) + coef, degree);
+        }
+
+        sparse_polynomial_kernel& operator= (
+            const sparse_polynomial_kernel& k
+        )
+        {
+            const_cast<scalar_type&>(gamma) = k.gamma;
+            const_cast<scalar_type&>(coef) = k.coef;
+            const_cast<scalar_type&>(degree) = k.degree;
+            return *this;
+        }
+
+        bool operator== (
+            const sparse_polynomial_kernel& k
+        ) const
+        {
+            return (gamma == k.gamma) && (coef == k.coef) && (degree == k.degree);
+        }
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const sparse_polynomial_kernel<T>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.gamma, out);
+            serialize(item.coef, out);
+            serialize(item.degree, out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type sparse_polynomial_kernel"); 
+        }
+    }
+
+    template <
+        typename T
+        >
+    void deserialize (
+        sparse_polynomial_kernel<T>& item,
+        std::istream& in 
+    )
+    {
+        typedef typename T::value_type::second_type scalar_type;
+        try
+        {
+            deserialize(const_cast<scalar_type&>(item.gamma), in);
+            deserialize(const_cast<scalar_type&>(item.coef), in);
+            deserialize(const_cast<scalar_type&>(item.degree), in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type sparse_polynomial_kernel"); 
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct sparse_sigmoid_kernel
+    {
+        typedef typename T::value_type::second_type scalar_type;
+        typedef T sample_type;
+        typedef default_memory_manager mem_manager_type;
+
+        sparse_sigmoid_kernel(const scalar_type g, const scalar_type c) : gamma(g), coef(c) {}
+        sparse_sigmoid_kernel() : gamma(0.1), coef(-1.0) {}
+        sparse_sigmoid_kernel(
+            const sparse_sigmoid_kernel& k
+        ) : gamma(k.gamma), coef(k.coef) {}
+
+        typedef T type;
+        const scalar_type gamma;
+        const scalar_type coef;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const
+        { 
+            return std::tanh(gamma*(dot(a,b)) + coef);
+        }
+
+        sparse_sigmoid_kernel& operator= (
+            const sparse_sigmoid_kernel& k
+        )
+        {
+            const_cast<scalar_type&>(gamma) = k.gamma;
+            const_cast<scalar_type&>(coef) = k.coef;
+            return *this;
+        }
+
+        bool operator== (
+            const sparse_sigmoid_kernel& k
+        ) const
+        {
+            return (gamma == k.gamma) && (coef == k.coef);
+        }
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const sparse_sigmoid_kernel<T>& item,
+        std::ostream& out
+    )
+    {
+        try
+        {
+            serialize(item.gamma, out);
+            serialize(item.coef, out);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while serializing object of type sparse_sigmoid_kernel"); 
+        }
+    }
+
+    template <
+        typename T
+        >
+    void deserialize (
+        sparse_sigmoid_kernel<T>& item,
+        std::istream& in 
+    )
+    {
+        typedef typename T::value_type::second_type scalar_type;
+        try
+        {
+            deserialize(const_cast<scalar_type&>(item.gamma), in);
+            deserialize(const_cast<scalar_type&>(item.coef), in);
+        }
+        catch (serialization_error& e)
+        { 
+            throw serialization_error(e.info + "\n   while deserializing object of type sparse_sigmoid_kernel"); 
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    struct sparse_linear_kernel
+    {
+        typedef typename T::value_type::second_type scalar_type;
+        typedef T sample_type;
+        typedef default_memory_manager mem_manager_type;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const
+        { 
+            return dot(a,b);
+        }
+
+        bool operator== (
+            const sparse_linear_kernel& 
+        ) const
+        {
+            return true;
+        }
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const sparse_linear_kernel<T>& ,
+        std::ostream& 
+    ){}
+
+    template <
+        typename T
+        >
+    void deserialize (
+        sparse_linear_kernel<T>& ,
+        std::istream&  
+    ){}
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    struct sparse_histogram_intersection_kernel
+    {
+        typedef typename T::value_type::second_type scalar_type;
+        typedef T sample_type;
+        typedef default_memory_manager mem_manager_type;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const
+        { 
+            typename sample_type::const_iterator ai = a.begin();
+            typename sample_type::const_iterator bi = b.begin();
+
+            scalar_type sum = 0;
+            while (ai != a.end() && bi != b.end())
+            {
+                if (ai->first == bi->first)
+                {
+                    sum += std::min(ai->second , bi->second);
+                    ++ai;
+                    ++bi;
+                }
+                else if (ai->first < bi->first)
+                {
+                    ++ai;
+                }
+                else 
+                {
+                    ++bi;
+                }
+            }
+
+            return sum;
+        }
+
+        bool operator== (
+            const sparse_histogram_intersection_kernel& 
+        ) const
+        {
+            return true;
+        }
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const sparse_histogram_intersection_kernel<T>& ,
+        std::ostream& 
+    ){}
+
+    template <
+        typename T
+        >
+    void deserialize (
+        sparse_histogram_intersection_kernel<T>& ,
+        std::istream&  
+    ){}
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_SPARSE_KERNEL
+
+
+
diff --git a/ml/dlib/dlib/svm/sparse_kernel_abstract.h b/ml/dlib/dlib/svm/sparse_kernel_abstract.h
new file mode 100644
index 000000000..55f9d7caa
--- /dev/null
+++ b/ml/dlib/dlib/svm/sparse_kernel_abstract.h
@@ -0,0 +1,486 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_SPARSE_KERNEL_ABSTRACT_
+#ifdef DLIB_SVm_SPARSE_KERNEL_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include "../algs.h"
+#include "../serialize.h"
+#include "kernel_abstract.h"
+#include "sparse_vector_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct sparse_radial_basis_kernel
+    {
+        /*!
+            REQUIREMENTS ON T
+                Must be a sparse vector as defined in dlib/svm/sparse_vector_abstract.h
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a radial basis function kernel
+                that works with sparse vectors.
+
+            THREAD SAFETY
+                This kernel is threadsafe.  
+        !*/
+
+        typedef typename T::value_type::second_type scalar_type;
+        typedef T sample_type;
+        typedef default_memory_manager mem_manager_type;
+
+        const scalar_type gamma;
+
+        sparse_radial_basis_kernel(
+        );
+        /*!
+            ensures
+                - #gamma == 0.1 
+        !*/
+
+        sparse_radial_basis_kernel(
+            const sparse_radial_basis_kernel& k
+        );
+        /*!
+            ensures
+                - #gamma == k.gamma
+        !*/
+
+        sparse_radial_basis_kernel(
+            const scalar_type g
+        );
+        /*!
+            ensures
+                - #gamma == g
+        !*/
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const;
+        /*!
+            requires
+                - a is a sparse vector
+                - b is a sparse vector
+            ensures
+                - returns exp(-gamma * distance_squared(a,b))
+        !*/
+
+        sparse_radial_basis_kernel& operator= (
+            const sparse_radial_basis_kernel& k
+        );
+        /*!
+            ensures
+                - #gamma = k.gamma
+                - returns *this
+        !*/
+
+        bool operator== (
+            const sparse_radial_basis_kernel& k
+        ) const;
+        /*!
+            ensures
+                - if (k and *this are identical) then
+                    - returns true
+                - else
+                    - returns false
+        !*/
+
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const sparse_radial_basis_kernel<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for sparse_radial_basis_kernel
+    !*/
+
+    template <
+        typename T
+        >
+    void deserialize (
+        sparse_radial_basis_kernel<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support for sparse_radial_basis_kernel
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct sparse_sigmoid_kernel
+    {
+        /*!
+            REQUIREMENTS ON T
+                Must be a sparse vector as defined in dlib/svm/sparse_vector_abstract.h
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a sigmoid kernel
+                that works with sparse vectors.
+
+            THREAD SAFETY
+                This kernel is threadsafe.  
+        !*/
+
+        typedef typename T::value_type::second_type scalar_type;
+        typedef T sample_type;
+        typedef default_memory_manager mem_manager_type;
+
+        const scalar_type gamma;
+        const scalar_type coef;
+
+        sparse_sigmoid_kernel(
+        );
+        /*!
+            ensures
+                - #gamma == 0.1 
+                - #coef == -1.0 
+        !*/
+
+        sparse_sigmoid_kernel(
+            const sparse_sigmoid_kernel& k
+        );
+        /*!
+            ensures
+                - #gamma == k.gamma
+                - #coef == k.coef
+        !*/
+
+        sparse_sigmoid_kernel(
+            const scalar_type g,
+            const scalar_type c
+        );
+        /*!
+            ensures
+                - #gamma == g
+                - #coef == c
+        !*/
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const;
+        /*!
+            requires
+                - a is a sparse vector
+                - b is a sparse vector
+            ensures
+                - returns tanh(gamma * dot(a,b) + coef)
+        !*/
+
+        sparse_sigmoid_kernel& operator= (
+            const sparse_sigmoid_kernel& k
+        );
+        /*!
+            ensures
+                - #gamma = k.gamma
+                - #coef = k.coef
+                - returns *this
+        !*/
+
+        bool operator== (
+            const sparse_sigmoid_kernel& k
+        ) const;
+        /*!
+            ensures
+                - if (k and *this are identical) then
+                    - returns true
+                - else
+                    - returns false
+        !*/
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const sparse_sigmoid_kernel<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for sparse_sigmoid_kernel
+    !*/
+
+    template <
+        typename T
+        >
+    void deserialize (
+        sparse_sigmoid_kernel<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support for sparse_sigmoid_kernel
+    !*/
+
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct sparse_polynomial_kernel
+    {
+        /*!
+            REQUIREMENTS ON T
+                Must be a sparse vector as defined in dlib/svm/sparse_vector_abstract.h
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a polynomial kernel
+                that works with sparse vectors.
+
+            THREAD SAFETY
+                This kernel is threadsafe.  
+        !*/
+
+        typedef typename T::value_type::second_type scalar_type;
+        typedef T sample_type;
+        typedef default_memory_manager mem_manager_type;
+
+        const scalar_type gamma;
+        const scalar_type coef;
+        const scalar_type degree;
+
+        sparse_polynomial_kernel(
+        );
+        /*!
+            ensures
+                - #gamma == 1 
+                - #coef == 0 
+                - #degree == 1 
+        !*/
+
+        sparse_polynomial_kernel(
+            const sparse_polynomial_kernel& k
+        );
+        /*!
+            ensures
+                - #gamma == k.gamma
+                - #coef == k.coef
+                - #degree == k.degree
+        !*/
+
+        sparse_polynomial_kernel(
+            const scalar_type g,
+            const scalar_type c,
+            const scalar_type d
+        );
+        /*!
+            ensures
+                - #gamma == g
+                - #coef == c
+                - #degree == d
+        !*/
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const;
+        /*!
+            requires
+                - a is a sparse vector
+                - b is a sparse vector
+            ensures
+                - returns pow(gamma * dot(a,b) + coef, degree)
+        !*/
+
+        sparse_polynomial_kernel& operator= (
+            const sparse_polynomial_kernel& k
+        );
+        /*!
+            ensures
+                - #gamma = k.gamma
+                - #coef = k.coef
+                - #degree = k.degree
+                - returns *this
+        !*/
+
+        bool operator== (
+            const sparse_polynomial_kernel& k
+        ) const;
+        /*!
+            ensures
+                - if (k and *this are identical) then
+                    - returns true
+                - else
+                    - returns false
+        !*/
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const sparse_polynomial_kernel<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for sparse_polynomial_kernel
+    !*/
+
+    template <
+        typename T
+        >
+    void deserialize (
+        sparse_polynomial_kernel<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support for sparse_polynomial_kernel
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct sparse_linear_kernel
+    {
+        /*!
+            REQUIREMENTS ON T
+                Must be a sparse vector as defined in dlib/svm/sparse_vector_abstract.h
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a linear function kernel
+                that works with sparse vectors.
+
+            THREAD SAFETY
+                This kernel is threadsafe.  
+        !*/
+
+        typedef typename T::value_type::second_type scalar_type;
+        typedef T sample_type;
+        typedef default_memory_manager mem_manager_type;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const;
+        /*!
+            requires
+                - a is a sparse vector
+                - b is a sparse vector
+            ensures
+                - returns dot(a,b) 
+        !*/
+
+        bool operator== (
+            const sparse_linear_kernel& k
+        ) const;
+        /*!
+            ensures
+                - returns true
+        !*/
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const sparse_linear_kernel<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for sparse_linear_kernel
+    !*/
+
+    template <
+        typename T
+        >
+    void deserialize (
+        sparse_linear_kernel<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support for sparse_linear_kernel 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    struct sparse_histogram_intersection_kernel
+    {
+        /*!
+            REQUIREMENTS ON T
+                Must be a sparse vector as defined in dlib/svm/sparse_vector_abstract.h
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a histogram intersection kernel 
+                that works with sparse vectors.
+
+            THREAD SAFETY
+                This kernel is threadsafe.  
+        !*/
+
+        typedef typename T::value_type::second_type scalar_type;
+        typedef T sample_type;
+        typedef default_memory_manager mem_manager_type;
+
+        scalar_type operator() (
+            const sample_type& a,
+            const sample_type& b
+        ) const;
+        /*!
+            requires
+                - a is a sparse vector
+                - b is a sparse vector
+                - all the values in a and b are >= 0
+            ensures
+                - Let A(i) denote the value of the ith dimension of the a vector.
+                - Let B(i) denote the value of the ith dimension of the b vector.
+                - returns sum over all i: std::min(A(i), B(i)) 
+        !*/
+
+        bool operator== (
+            const sparse_histogram_intersection_kernel& k
+        ) const;
+        /*!
+            ensures
+                - returns true
+        !*/
+    };
+
+    template <
+        typename T
+        >
+    void serialize (
+        const sparse_histogram_intersection_kernel<T>& item,
+        std::ostream& out
+    );
+    /*!
+        provides serialization support for sparse_histogram_intersection_kernel
+    !*/
+
+    template <
+        typename T
+        >
+    void deserialize (
+        sparse_histogram_intersection_kernel<T>& item,
+        std::istream& in 
+    );
+    /*!
+        provides deserialization support for sparse_histogram_intersection_kernel 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_SPARSE_KERNEL_ABSTRACT_
+
+
diff --git a/ml/dlib/dlib/svm/sparse_vector.h b/ml/dlib/dlib/svm/sparse_vector.h
new file mode 100644
index 000000000..c42723f89
--- /dev/null
+++ b/ml/dlib/dlib/svm/sparse_vector.h
@@ -0,0 +1,1170 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_SPARSE_VECTOR
+#define DLIB_SVm_SPARSE_VECTOR
+
+#include "sparse_vector_abstract.h"
+#include <cmath>
+#include <limits>
+#include "../algs.h"
+#include <vector>
+#include <map>
+#include "../graph_utils/edge_list_graphs.h"
+#include "../matrix.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename U>
+    typename T::value_type::second_type distance_squared (
+        const T& a,
+        const U& b
+    )
+    {
+        typedef typename T::value_type::second_type scalar_type;
+        typedef typename U::value_type::second_type scalar_typeU;
+        // Both T and U must contain the same kinds of elements
+        COMPILE_TIME_ASSERT((is_same_type<scalar_type, scalar_typeU>::value));
+
+        typename T::const_iterator ai = a.begin();
+        typename U::const_iterator bi = b.begin();
+
+        scalar_type sum = 0, temp = 0;
+        while (ai != a.end() && bi != b.end())
+        {
+            if (ai->first == bi->first)
+            {
+                temp = ai->second - bi->second;
+                ++ai;
+                ++bi;
+            }
+            else if (ai->first < bi->first)
+            {
+                temp = ai->second;
+                ++ai;
+            }
+            else 
+            {
+                temp = bi->second;
+                ++bi;
+            }
+
+            sum += temp*temp;
+        }
+
+        while (ai != a.end())
+        {
+            sum += ai->second*ai->second;
+            ++ai;
+        }
+        while (bi != b.end())
+        {
+            sum += bi->second*bi->second;
+            ++bi;
+        }
+
+        return sum;
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T, typename U, typename V, typename W>
+    typename T::value_type::second_type distance_squared (
+        const V& a_scale,
+        const T& a,
+        const W& b_scale,
+        const U& b
+    )
+    {
+        typedef typename T::value_type::second_type scalar_type;
+        typedef typename U::value_type::second_type scalar_typeU;
+        // Both T and U must contain the same kinds of elements
+        COMPILE_TIME_ASSERT((is_same_type<scalar_type, scalar_typeU>::value));
+
+        typename T::const_iterator ai = a.begin();
+        typename U::const_iterator bi = b.begin();
+
+        scalar_type sum = 0, temp = 0;
+        while (ai != a.end() && bi != b.end())
+        {
+            if (ai->first == bi->first)
+            {
+                temp = a_scale*ai->second - b_scale*bi->second;
+                ++ai;
+                ++bi;
+            }
+            else if (ai->first < bi->first)
+            {
+                temp = a_scale*ai->second;
+                ++ai;
+            }
+            else 
+            {
+                temp = b_scale*bi->second;
+                ++bi;
+            }
+
+            sum += temp*temp;
+        }
+
+        while (ai != a.end())
+        {
+            sum += a_scale*a_scale*ai->second*ai->second;
+            ++ai;
+        }
+        while (bi != b.end())
+        {
+            sum += b_scale*b_scale*bi->second*bi->second;
+            ++bi;
+        }
+
+        return sum;
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T, typename U>
+    typename T::value_type::second_type distance (
+        const T& a,
+        const U& b
+    )
+    {
+        return std::sqrt(distance_squared(a,b));
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T, typename U, typename V, typename W>
+    typename T::value_type::second_type distance (
+        const V& a_scale,
+        const T& a,
+        const W& b_scale,
+        const U& b
+    )
+    {
+        return std::sqrt(distance_squared(a_scale,a,b_scale,b));
+    }
+
+// ------------------------------------------------------------------------------------
+    // ------------------------------------------------------------------------------------
+
+    template <typename T, typename EXP>
+    typename enable_if<is_matrix<T> >::type assign (
+        T& dest,
+        const matrix_exp<EXP>& src
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(src),
+                    "\t void assign(dest,src)"
+                    << "\n\t the src matrix must be a row or column vector"
+        );
+
+        dest = src;
+    }
+
+    template <typename T, typename EXP>
+    typename disable_if<is_matrix<T> >::type assign (
+        T& dest,
+        const matrix_exp<EXP>& src
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(src),
+                    "\t void assign(dest,src)"
+                    << "\n\t the src matrix must be a row or column vector"
+        );
+
+        dest.clear();
+        typedef typename T::value_type item_type;
+        for (long i = 0; i < src.size(); ++i)
+        {
+            dest.insert(dest.end(),item_type(i, src(i)));
+        }
+    }
+
+    template <typename T, typename U>
+    typename disable_if_c<is_matrix<T>::value || is_matrix<U>::value>::type assign (
+        T& dest,        // sparse
+        const U& src    // sparse
+    )
+    {
+        dest.assign(src.begin(), src.end());
+    }
+
+    template <typename T, typename U, typename Comp, typename Alloc, typename S>
+    typename disable_if<is_matrix<S> >::type assign (
+        std::map<T,U,Comp,Alloc>& dest, // sparse
+        const S& src                    // sparse
+    )
+    {
+        dest.clear();
+        dest.insert(src.begin(), src.end());
+    }
+
+// ------------------------------------------------------------------------------------
+    // ------------------------------------------------------------------------------------
+
+    template <typename T>
+    struct has_unsigned_keys
+    {
+        static const bool value = is_unsigned_type<typename T::value_type::first_type>::value;
+    };
+
+// ------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <typename T, typename U>
+        typename T::value_type::second_type general_dot (
+            const T& a,
+            const U& b
+        )
+        {
+            typedef typename T::value_type::second_type scalar_type;
+
+            typename T::const_iterator ai = a.begin();
+            typename U::const_iterator bi = b.begin();
+
+            scalar_type sum = 0;
+            while (ai != a.end() && bi != b.end())
+            {
+                if (ai->first == bi->first)
+                {
+                    sum += ai->second * bi->second;
+                    ++ai;
+                    ++bi;
+                }
+                else if (ai->first < bi->first)
+                {
+                    ++ai;
+                }
+                else 
+                {
+                    ++bi;
+                }
+            }
+
+            return sum;
+        }
+
+        template <typename T, typename U>
+        inline typename T::value_type::second_type dot (
+            const T& a,
+            const U& b
+        )
+        {
+            return general_dot(a,b);
+        }
+
+        template <typename T, typename U, typename alloc>
+        U dot (
+            const std::vector<std::pair<T,U>,alloc>& a,
+            const std::vector<std::pair<T,U>,alloc>& b
+        )
+        {
+            // You are getting this error because you are attempting to use sparse sample vectors 
+            // but you aren't using an unsigned integer as your key type in the sparse vectors.
+            COMPILE_TIME_ASSERT(is_unsigned_type<T>::value);
+
+            if (a.size() == 0 || b.size() == 0)
+                return 0;
+
+            // if a is really a dense vector but just represented in a sparse container
+            if (a.back().first == a.size()-1)
+            {
+                double sum = 0;
+                for (unsigned long i = 0; i < b.size(); ++i)
+                {
+                    if (b[i].first >= a.size())
+                        break;
+                    sum += a[b[i].first].second * b[i].second;
+                }
+                return sum;
+            }
+            // if b is really a dense vector but just represented in a sparse container
+            else if (b.back().first == b.size()-1)
+            {
+                double sum = 0;
+                for (unsigned long i = 0; i < a.size(); ++i)
+                {
+                    if (a[i].first >= b.size())
+                        break;
+                    sum += b[a[i].first].second * a[i].second;
+                }
+                return sum;
+            }
+            else
+            {
+                return general_dot(a,b);
+            }
+        }
+    }
+
+    template <typename T>
+    inline typename T::value_type::second_type dot (
+        const T& a,
+        const T& b
+    )
+    {
+        return impl::dot(a,b);
+    }
+
+    template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
+    inline T4 dot (
+        const std::vector<T1,T2>& a,
+        const std::map<T3,T4,T5,T6>& b
+    )
+    {
+        return impl::dot(a,b);
+    }
+
+    template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
+    inline T4 dot (
+        const std::map<T3,T4,T5,T6>& a,
+        const std::vector<T1,T2>& b
+    )
+    {
+        return impl::dot(a,b);
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T, typename EXP>
+    typename T::value_type::second_type dot (
+        const T& a,
+        const matrix_exp<EXP>& b
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(b),
+                    "\t scalar_type dot(sparse_vector a, dense_vector b)"
+                    << "\n\t 'b' must be a vector to be used in a dot product." 
+        );
+
+        typedef typename T::value_type::second_type scalar_type;
+        typedef typename T::value_type::first_type first_type;
+
+        scalar_type sum = 0;
+        for (typename T::const_iterator ai = a.begin(); 
+             (ai != a.end()) && (ai->first < static_cast<first_type>(b.size())); 
+             ++ai)
+        {
+            sum += ai->second * b(ai->first);
+        }
+
+        return sum;
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T, typename EXP>
+    typename T::value_type::second_type dot (
+        const matrix_exp<EXP>& b,
+        const T& a
+    )
+    {
+        return dot(a,b);
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T>
+    typename T::value_type::second_type length_squared (
+        const T& a
+    )
+    {
+        typedef typename T::value_type::second_type scalar_type;
+
+        typename T::const_iterator i;
+
+        scalar_type sum = 0;
+
+        for (i = a.begin(); i != a.end(); ++i)
+        {
+            sum += i->second * i->second;
+        }
+
+        return sum;
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T>
+    typename T::value_type::second_type length (
+        const T& a
+    )
+    {
+        return std::sqrt(length_squared(a));
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T, typename U>
+    typename disable_if<is_matrix<T>,void>::type scale_by (
+        T& a,
+        const U& value
+    )
+    {
+        for (typename T::iterator i = a.begin(); i != a.end(); ++i)
+        {
+            i->second *= value;
+        }
+    }
+
+    template <typename T, typename U>
+    typename enable_if<is_matrix<T>,void>::type scale_by (
+        T& a,
+        const U& value
+    )
+    {
+        a *= value;
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T>
+    typename disable_if<is_matrix<T>,T>::type add (
+        const T& a,
+        const T& b
+    )
+    {
+        T temp;
+
+        typename T::const_iterator i = a.begin();
+        typename T::const_iterator j = b.begin();
+        while (i != a.end() && j != b.end())
+        {
+            if (i->first == j->first)
+            {
+                temp.insert(temp.end(), std::make_pair(i->first, i->second + j->second));
+                ++i;
+                ++j;
+            }
+            else if (i->first < j->first)
+            {
+                temp.insert(temp.end(), *i);
+                ++i;
+            }
+            else
+            {
+                temp.insert(temp.end(), *j);
+                ++j;
+            }
+        }
+
+        while (i != a.end())
+        {
+            temp.insert(temp.end(), *i);
+            ++i;
+        }
+        while (j != b.end())
+        {
+            temp.insert(temp.end(), *j);
+            ++j;
+        }
+
+        return temp;
+    }
+
+    template <typename T, typename U>
+    typename enable_if_c<is_matrix<T>::value && is_matrix<U>::value, matrix_add_exp<T,U> >::type add (
+        const T& a,
+        const U& b
+    )
+    {
+        return matrix_add_exp<T,U>(a.ref(),b.ref());
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T>
+    typename disable_if<is_matrix<T>,T>::type subtract (
+        const T& a,
+        const T& b
+    )
+    {
+        T temp;
+
+        typename T::const_iterator i = a.begin();
+        typename T::const_iterator j = b.begin();
+        while (i != a.end() && j != b.end())
+        {
+            if (i->first == j->first)
+            {
+                temp.insert(temp.end(), std::make_pair(i->first, i->second - j->second));
+                ++i;
+                ++j;
+            }
+            else if (i->first < j->first)
+            {
+                temp.insert(temp.end(), *i);
+                ++i;
+            }
+            else
+            {
+                temp.insert(temp.end(), std::make_pair(j->first, -j->second));
+                ++j;
+            }
+        }
+
+        while (i != a.end())
+        {
+            temp.insert(temp.end(), *i);
+            ++i;
+        }
+        while (j != b.end())
+        {
+            temp.insert(temp.end(), std::make_pair(j->first, -j->second));
+            ++j;
+        }
+
+        return temp;
+    }
+
+    template <typename T, typename U>
+    typename enable_if_c<is_matrix<T>::value && is_matrix<U>::value, matrix_subtract_exp<T,U> >::type subtract (
+        const T& a,
+        const U& b
+    )
+    {
+        return matrix_subtract_exp<T,U>(a.ref(),b.ref());
+    }
+
+// ------------------------------------------------------------------------------------
+// ------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <typename T>
+        typename enable_if<is_matrix<typename T::type>,unsigned long>::type max_index_plus_one (
+            const T& samples
+        ) 
+        {
+            if (samples.size() > 0)
+                return samples(0).size();
+            else
+                return 0;
+        }
+
+        template <typename T>
+        typename enable_if<is_built_in_scalar_type<typename T::type>,unsigned long>::type max_index_plus_one (
+            const T& sample
+        ) 
+        {
+            return sample.size();
+        }
+
+        // This !is_built_in_scalar_type<typename T::type>::value is here to avoid an inexplicable bug in Vistual Studio 2005
+        template <typename T>
+        typename enable_if_c<(!is_built_in_scalar_type<typename T::type>::value) && (is_pair<typename T::type::value_type>::value) ,unsigned long>::type 
+        max_index_plus_one (
+            const T& samples
+        ) 
+        {
+            typedef typename T::type sample_type;
+            // You are getting this error because you are attempting to use sparse sample vectors 
+            // but you aren't using an unsigned integer as your key type in the sparse vectors.
+            COMPILE_TIME_ASSERT(has_unsigned_keys<sample_type>::value);
+
+
+            // these should be sparse samples so look over all them to find the max index.
+            unsigned long max_dim = 0;
+            for (long i = 0; i < samples.size(); ++i)
+            {
+                if (samples(i).size() > 0)
+                    max_dim = std::max<unsigned long>(max_dim, (--samples(i).end())->first + 1);
+            }
+
+            return max_dim;
+        }
+    }
+
+    template <typename T>
+    typename enable_if<is_pair<typename T::value_type>,unsigned long>::type max_index_plus_one (
+        const T& sample
+    ) 
+    {
+        if (sample.size() > 0)
+            return (--sample.end())->first + 1;
+        return 0;
+    }
+
+    template <typename T>
+    typename disable_if_c<is_pair<typename T::value_type>::value ||
+                          is_same_type<typename T::value_type,sample_pair>::value ||
+                          is_same_type<typename T::value_type,ordered_sample_pair>::value , unsigned long>::type 
+    max_index_plus_one (
+        const T& samples
+    ) 
+    {
+        return impl::max_index_plus_one(mat(samples));
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T, long NR, long NC, typename MM, typename L, typename EXP>
+    inline void add_to (
+        matrix<T,NR,NC,MM,L>& dest,
+        const matrix_exp<EXP>& src 
+    ) 
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()),
+                    "\t void add_to(dest,src)"
+                    << "\n\t dest must be a vector large enough to hold the src vector."
+                    << "\n\t is_vector(dest):         " << is_vector(dest)
+                    << "\n\t max_index_plus_one(src): " << max_index_plus_one(src)
+                    << "\n\t dest.size():             " << dest.size() 
+        );
+
+        for (long r = 0; r < src.size(); ++r)
+            dest(r) += src(r);
+    }
+
+    template <typename T, long NR, long NC, typename MM, typename L, typename EXP>
+    inline typename disable_if<is_matrix<EXP> >::type add_to (
+        matrix<T,NR,NC,MM,L>& dest,
+        const EXP& src
+    ) 
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()),
+                    "\t void add_to(dest,src)"
+                    << "\n\t dest must be a vector large enough to hold the src vector."
+                    << "\n\t is_vector(dest):         " << is_vector(dest)
+                    << "\n\t max_index_plus_one(src): " << max_index_plus_one(src)
+                    << "\n\t dest.size():             " << dest.size() 
+        );
+
+        for (typename EXP::const_iterator i = src.begin(); i != src.end(); ++i)
+            dest(i->first) += i->second;
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T, long NR, long NC, typename MM, typename L, typename EXP, typename U>
+    inline void add_to (
+        matrix<T,NR,NC,MM,L>& dest,
+        const matrix_exp<EXP>& src,
+        const U& C
+    ) 
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()),
+                    "\t void add_to(dest,src)"
+                    << "\n\t dest must be a vector large enough to hold the src vector."
+                    << "\n\t is_vector(dest):         " << is_vector(dest)
+                    << "\n\t max_index_plus_one(src): " << max_index_plus_one(src)
+                    << "\n\t dest.size():             " << dest.size() 
+        );
+
+        for (long r = 0; r < src.size(); ++r)
+            dest(r) += C*src(r);
+    }
+
+    template <typename T, long NR, long NC, typename MM, typename L, typename EXP, typename U>
+    inline typename disable_if<is_matrix<EXP> >::type add_to (
+        matrix<T,NR,NC,MM,L>& dest,
+        const EXP& src,
+        const U& C
+    ) 
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()),
+                    "\t void add_to(dest,src)"
+                    << "\n\t dest must be a vector large enough to hold the src vector."
+                    << "\n\t is_vector(dest):         " << is_vector(dest)
+                    << "\n\t max_index_plus_one(src): " << max_index_plus_one(src)
+                    << "\n\t dest.size():             " << dest.size() 
+        );
+
+        for (typename EXP::const_iterator i = src.begin(); i != src.end(); ++i)
+            dest(i->first) += C*i->second;
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T, long NR, long NC, typename MM, typename L, typename EXP>
+    inline void subtract_from (
+        matrix<T,NR,NC,MM,L>& dest,
+        const matrix_exp<EXP>& src 
+    ) 
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()),
+                    "\t void subtract_from(dest,src)"
+                    << "\n\t dest must be a vector large enough to hold the src vector."
+                    << "\n\t is_vector(dest):         " << is_vector(dest)
+                    << "\n\t max_index_plus_one(src): " << max_index_plus_one(src)
+                    << "\n\t dest.size():             " << dest.size() 
+        );
+
+        for (long r = 0; r < src.size(); ++r)
+            dest(r) -= src(r);
+    }
+
+    template <typename T, long NR, long NC, typename MM, typename L, typename EXP>
+    inline typename disable_if<is_matrix<EXP> >::type subtract_from (
+        matrix<T,NR,NC,MM,L>& dest,
+        const EXP& src
+    ) 
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()),
+                    "\t void subtract_from(dest,src)"
+                    << "\n\t dest must be a vector large enough to hold the src vector."
+                    << "\n\t is_vector(dest):         " << is_vector(dest)
+                    << "\n\t max_index_plus_one(src): " << max_index_plus_one(src)
+                    << "\n\t dest.size():             " << dest.size() 
+        );
+
+        for (typename EXP::const_iterator i = src.begin(); i != src.end(); ++i)
+            dest(i->first) -= i->second;
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T, long NR, long NC, typename MM, typename L, typename EXP, typename U>
+    inline void subtract_from (
+        matrix<T,NR,NC,MM,L>& dest,
+        const matrix_exp<EXP>& src,
+        const U& C
+    ) 
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()),
+                    "\t void subtract_from(dest,src)"
+                    << "\n\t dest must be a vector large enough to hold the src vector."
+                    << "\n\t is_vector(dest):         " << is_vector(dest)
+                    << "\n\t max_index_plus_one(src): " << max_index_plus_one(src)
+                    << "\n\t dest.size():             " << dest.size() 
+        );
+
+        for (long r = 0; r < src.size(); ++r)
+            dest(r) -= C*src(r);
+    }
+
+    template <typename T, long NR, long NC, typename MM, typename L, typename EXP, typename U>
+    inline typename disable_if<is_matrix<EXP> >::type subtract_from (
+        matrix<T,NR,NC,MM,L>& dest,
+        const EXP& src,
+        const U& C
+    ) 
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(dest) && max_index_plus_one(src) <= static_cast<unsigned long>(dest.size()),
+                    "\t void subtract_from(dest,src)"
+                    << "\n\t dest must be a vector large enough to hold the src vector."
+                    << "\n\t is_vector(dest):         " << is_vector(dest)
+                    << "\n\t max_index_plus_one(src): " << max_index_plus_one(src)
+                    << "\n\t dest.size():             " << dest.size() 
+        );
+
+        for (typename EXP::const_iterator i = src.begin(); i != src.end(); ++i)
+            dest(i->first) -= C*i->second;
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T>
+    typename T::value_type::second_type min (
+        const T& a
+    )
+    {
+        typedef typename T::value_type::second_type type;
+
+        type temp = 0;
+        for (typename T::const_iterator i = a.begin(); i != a.end(); ++i)
+        {
+            if (temp > i->second)
+                temp = i->second;
+        }
+        return temp;
+    }
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T>
+    typename T::value_type::second_type max (
+        const T& a
+    )
+    {
+        typedef typename T::value_type::second_type type;
+
+        type temp = 0;
+        for (typename T::const_iterator i = a.begin(); i != a.end(); ++i)
+        {
+            if (temp < i->second)
+                temp = i->second;
+        }
+        return temp;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <typename sparse_vector_type>
+        inline matrix<typename sparse_vector_type::value_type::second_type,0,1> sparse_to_dense (
+            const sparse_vector_type& vect,
+            unsigned long num_dimensions 
+        )
+        {
+            // You must use unsigned integral key types in your sparse vectors
+            typedef typename sparse_vector_type::value_type::first_type idx_type;
+            typedef typename sparse_vector_type::value_type::second_type value_type;
+            COMPILE_TIME_ASSERT(is_unsigned_type<idx_type>::value);
+
+            matrix<value_type,0,1> result;
+
+            if (vect.size() == 0)
+                return result;
+
+            result.set_size(num_dimensions);
+            result = 0;
+
+            for (typename sparse_vector_type::const_iterator j = vect.begin(); j != vect.end(); ++j)
+            {
+                if ((long)(j->first) < result.size())
+                {
+                    result(j->first) += j->second;
+                }
+            }
+
+            return result;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename idx_type, typename value_type, typename alloc>
+    matrix<value_type,0,1> sparse_to_dense (
+        const std::vector<std::pair<idx_type,value_type>,alloc>& vect,
+        unsigned long num_dimensions 
+    )
+    {
+        return impl::sparse_to_dense(vect,num_dimensions);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename idx_type, typename value_type, typename alloc>
+    matrix<value_type,0,1> sparse_to_dense (
+        const std::vector<std::pair<idx_type,value_type>,alloc>& vect
+    )
+    {
+        return impl::sparse_to_dense(vect, max_index_plus_one(vect));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T1, typename T2, typename T3, typename T4>
+    matrix<T2,0,1> sparse_to_dense (
+        const std::map<T1,T2,T3,T4>& vect,
+        unsigned long num_dimensions 
+    )
+    {
+        return impl::sparse_to_dense(vect,num_dimensions);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T1, typename T2, typename T3, typename T4>
+    matrix<T2,0,1> sparse_to_dense (
+        const std::map<T1,T2,T3,T4>& vect
+    )
+    {
+        return impl::sparse_to_dense(vect, max_index_plus_one(vect));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    typename enable_if<is_matrix<T>,T&>::type sparse_to_dense(
+        T& item
+    ) { return item; }
+    
+    template <typename EXP>
+    matrix<typename EXP::type,0,1> sparse_to_dense(
+        const matrix_exp<EXP>& item,
+        unsigned long num
+    ) 
+    { 
+        typedef typename EXP::type type;
+        if (item.size() == (long)num)
+            return item; 
+        else if (item.size() < (long)num)
+            return join_cols(item, zeros_matrix<type>((long)num-item.size(),1));
+        else
+            return colm(item,0,(long)num);
+    }
+    
+// ----------------------------------------------------------------------------------------
+
+    template <typename sample_type, typename alloc>
+    std::vector<matrix<typename sample_type::value_type::second_type,0,1> > sparse_to_dense (
+        const std::vector<sample_type, alloc>& samples,
+        unsigned long num_dimensions
+    )
+    {
+        typedef typename sample_type::value_type pair_type;
+        typedef typename pair_type::second_type value_type;
+
+        std::vector< matrix<value_type,0,1> > result;
+
+        // now turn all the samples into dense samples
+        result.resize(samples.size());
+
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            result[i] = sparse_to_dense(samples[i],num_dimensions);
+        }
+
+        return result;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename sample_type, typename alloc>
+    std::vector<matrix<typename sample_type::value_type::second_type,0,1> > sparse_to_dense (
+        const std::vector<sample_type, alloc>& samples
+    )
+    {
+        return sparse_to_dense(samples, max_index_plus_one(samples));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    T make_sparse_vector (
+        const T& v
+    )
+    {
+        // You must use unsigned integral key types in your sparse vectors
+        typedef typename T::value_type::first_type idx_type;
+        typedef typename T::value_type::second_type value_type;
+        COMPILE_TIME_ASSERT(is_unsigned_type<idx_type>::value);
+        std::map<idx_type,value_type> temp;
+        for (typename T::const_iterator i = v.begin(); i != v.end(); ++i)
+        {
+            temp[i->first] += i->second;
+        }
+
+        return T(temp.begin(), temp.end());
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    void make_sparse_vector_inplace(
+        T& vect
+    )
+    {
+        vect = make_sparse_vector(vect);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U,
+        typename alloc
+        >
+    void make_sparse_vector_inplace (
+        std::vector<std::pair<T,U>,alloc>& vect
+    )
+    {
+        if (vect.size() > 0)
+        {
+            std::sort(vect.begin(), vect.end());
+
+            // merge duplicates
+            for (unsigned long i = 1; i < vect.size(); ++i)
+            {
+                // if we found a duplicate
+                if (vect[i-1].first == vect[i].first)
+                {
+                    // now start collapsing and merging the vector
+                    unsigned long j = i-1;
+                    for (unsigned long k = i; k < vect.size(); ++k)
+                    {
+                        if (vect[j].first == vect[k].first)
+                        {
+                            vect[j].second += vect[k].second;
+                        }
+                        else
+                        {
+                            ++j;
+                            vect[j] = vect[k];
+                        }
+                    }
+
+
+                    // we removed elements when we merged so we need to adjust the size.
+                    vect.resize(j+1);
+                    return;
+                }
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename EXP, typename T, long NR, long NC, typename MM, typename L>
+    void sparse_matrix_vector_multiply (
+        const std::vector<sample_pair>& edges,
+        const matrix_exp<EXP>& v,
+        matrix<T,NR,NC,MM,L>& result
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(max_index_plus_one(edges) <= (unsigned long)v.size() &&
+                    is_col_vector(v),
+                    "\t void sparse_matrix_vector_multiply()"
+                    << "\n\t Invalid inputs were given to this function"
+                    << "\n\t max_index_plus_one(edges): " << max_index_plus_one(edges)
+                    << "\n\t v.size():                  " << v.size() 
+                    << "\n\t is_col_vector(v):          " << is_col_vector(v) 
+        );
+
+        result.set_size(v.nr(),v.nc());
+        result = 0;
+
+        for (unsigned long k = 0; k < edges.size(); ++k)
+        {
+            const long i = edges[k].index1();
+            const long j = edges[k].index2();
+            const double d = edges[k].distance();
+
+            result(i) += v(j)*d;
+            if (i != j)
+                result(j) += v(i)*d;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename EXP>
+    matrix<typename EXP::type,0,1> sparse_matrix_vector_multiply (
+        const std::vector<sample_pair>& edges,
+        const matrix_exp<EXP>& v
+    )
+    {
+        matrix<typename EXP::type,0,1> result;
+        sparse_matrix_vector_multiply(edges,v,result);
+        return result;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename EXP, typename T, long NR, long NC, typename MM, typename L>
+    void sparse_matrix_vector_multiply (
+        const std::vector<ordered_sample_pair>& edges,
+        const matrix_exp<EXP>& v,
+        matrix<T,NR,NC,MM,L>& result
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(max_index_plus_one(edges) <= (unsigned long)v.size() &&
+                    is_col_vector(v),
+                    "\t void sparse_matrix_vector_multiply()"
+                    << "\n\t Invalid inputs were given to this function"
+                    << "\n\t max_index_plus_one(edges): " << max_index_plus_one(edges)
+                    << "\n\t v.size():                  " << v.size() 
+                    << "\n\t is_col_vector(v):          " << is_col_vector(v) 
+        );
+
+
+        result.set_size(v.nr(),v.nc());
+        result = 0;
+
+        for (unsigned long k = 0; k < edges.size(); ++k)
+        {
+            const long i = edges[k].index1();
+            const long j = edges[k].index2();
+            const double d = edges[k].distance();
+
+            result(i) += v(j)*d;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename EXP>
+    matrix<typename EXP::type,0,1> sparse_matrix_vector_multiply (
+        const std::vector<ordered_sample_pair>& edges,
+        const matrix_exp<EXP>& v
+    )
+    {
+        matrix<typename EXP::type,0,1> result;
+        sparse_matrix_vector_multiply(edges,v,result);
+        return result;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename EXP, 
+        typename sparse_vector_type,
+        typename T,
+        long NR,
+        long NC,
+        typename MM,
+        typename L
+        >
+    void sparse_matrix_vector_multiply (
+        const matrix_exp<EXP>& m,
+        const sparse_vector_type& v,
+        matrix<T,NR,NC,MM,L>& result
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(max_index_plus_one(v) <= (unsigned long)m.nc(),
+                    "\t void sparse_matrix_vector_multiply()"
+                    << "\n\t Invalid inputs were given to this function"
+                    << "\n\t max_index_plus_one(v): " << max_index_plus_one(v)
+                    << "\n\t m.size():              " << m.size() 
+        );
+
+        result.set_size(m.nr(),1);
+        result = 0;
+
+        for (typename sparse_vector_type::const_iterator i = v.begin(); i != v.end(); ++i)
+        {
+            for (long r = 0; r < result.nr(); ++r)
+            {
+                result(r) += m(r, i->first)*i->second;
+            }
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename EXP, 
+        typename sparse_vector_type
+        >
+    matrix<typename EXP::type,0,1> sparse_matrix_vector_multiply (
+        const matrix_exp<EXP>& m,
+        const sparse_vector_type& v
+    )
+    {
+        matrix<typename EXP::type,0,1> result;
+        sparse_matrix_vector_multiply(m,v,result);
+        return result;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_SPARSE_VECTOR
+
diff --git a/ml/dlib/dlib/svm/sparse_vector_abstract.h b/ml/dlib/dlib/svm/sparse_vector_abstract.h
new file mode 100644
index 000000000..e0c8d1f8c
--- /dev/null
+++ b/ml/dlib/dlib/svm/sparse_vector_abstract.h
@@ -0,0 +1,688 @@
+// Copyright (C) 2009  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_SPARSE_VECTOR_ABSTRACT_
+#ifdef DLIB_SVm_SPARSE_VECTOR_ABSTRACT_
+
+#include <cmath>
+#include "../algs.h"
+#include "../serialize.h"
+#include "../matrix.h"
+#include <map>
+#include <vector>
+#include "../graph_utils/sample_pair_abstract.h"
+#include "../graph_utils/ordered_sample_pair_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    /*!A sparse_vectors
+
+        In dlib, sparse vectors are represented using the container objects
+        in the C++ STL.  In particular, a sparse vector is any container that 
+        contains a range of std::pair<key, scalar_value> objects where:
+            - key is an unsigned integral type 
+            - scalar_value is float, double, or long double
+            - the std::pair objects have unique key values
+            - the std::pair objects are sorted such that small keys come first 
+
+        Therefore, if an object satisfies the above requirements we call it a
+        "sparse vector".  Additionally, we define the concept of an "unsorted sparse vector"
+        to be a sparse vector that doesn't necessarily have sorted or unique key values.  
+        Therefore, all sparse vectors are valid unsorted sparse vectors but not the other 
+        way around.  
+
+        An unsorted sparse vector with duplicate keys is always interpreted as
+        a vector where each dimension contains the sum of all corresponding elements 
+        of the unsorted sparse vector.  For example, an unsorted sparse vector 
+        with the elements { (3,1), (0, 4), (3,5) } represents the 4D vector:
+            [4, 0, 0, 1+5]
+
+
+
+        Examples of valid sparse vectors are:    
+            - std::map<unsigned long, double>
+            - std::vector<std::pair<unsigned long, float> > where the vector is sorted.
+              (you could make sure it was sorted by applying std::sort to it)
+
+
+        Finally, by "dense vector" we mean a dlib::matrix object which represents
+        either a row or column vector.
+
+        The rest of this file defines a number of helper functions for doing normal 
+        vector arithmetic things with sparse vectors.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    /*!A has_unsigned_keys
+
+        This is a template where has_unsigned_keys<T>::value == true when T is a
+        sparse vector that contains unsigned integral keys and false otherwise.
+    !*/
+
+    template <typename T>
+    struct has_unsigned_keys
+    {
+        static const bool value = is_unsigned_type<typename T::value_type::first_type>::value;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename U>
+    typename T::value_type::second_type distance_squared (
+        const T& a,
+        const U& b
+    );
+    /*!
+        requires
+            - a and b are sparse vectors
+        ensures
+            - returns the squared distance between the vectors
+              a and b
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename U, typename V, typename W>
+    typename T::value_type::second_type distance_squared (
+        const V& a_scale,
+        const T& a,
+        const W& b_scale,
+        const U& b
+    );
+    /*!
+        requires
+            - a and b are sparse vectors
+        ensures
+            - returns the squared distance between the vectors
+              a_scale*a and b_scale*b
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename U>
+    typename T::value_type::second_type distance (
+        const T& a,
+        const U& b
+    );
+    /*!
+        requires
+            - a and b are sparse vectors
+        ensures
+            - returns the distance between the vectors
+              a and b.  (i.e. std::sqrt(distance_squared(a,b)))
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename U, typename V, typename W>
+    typename T::value_type::second_type distance (
+        const V& a_scale,
+        const T& a,
+        const W& b_scale,
+        const U& b
+    );
+    /*!
+        requires
+            - a and b are sparse vectors
+        ensures
+            - returns the distance between the vectors
+              a_scale*a and b_scale*b.  (i.e. std::sqrt(distance_squared(a_scale,a,b_scale,b)))
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename U>
+    void assign (
+        T& dest,
+        const U& src
+    );
+    /*!
+        requires
+            - dest == a sparse vector or a dense vector
+            - src == a sparse vector or a dense vector
+            - dest is not dense when src is sparse
+              (i.e. you can't assign a sparse vector to a dense vector.  This is
+              because we don't know what the proper dimensionality should be for the
+              dense vector)
+        ensures
+            - #src represents the same vector as dest.  
+              (conversion between sparse/dense formats is done automatically)
+    !*/
+
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    typename T::value_type::second_type dot (
+        const T& a,
+        const T& b
+    );
+    /*!
+        requires
+            - a and b are sparse vectors 
+        ensures
+            - returns the dot product between the vectors a and b
+    !*/
+
+    template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
+    T4 dot (
+        const std::vector<T1,T2>& a,
+        const std::map<T3,T4,T5,T6>& b
+    );
+    /*!
+        requires
+            - a and b are sparse vectors 
+        ensures
+            - returns the dot product between the vectors a and b
+    !*/
+
+    template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
+    T4 dot (
+        const std::map<T3,T4,T5,T6>& a,
+        const std::vector<T1,T2>& b
+    );
+    /*!
+        requires
+            - a and b are sparse vectors 
+        ensures
+            - returns the dot product between the vectors a and b
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename EXP>
+    typename T::value_type::second_type dot (
+        const T& a,
+        const matrix_exp<EXP>& b
+    );
+    /*!
+        requires
+            - a is an unsorted sparse vector
+            - is_vector(b) == true
+        ensures
+            - returns the dot product between the vectors a and b.  
+            - if (max_index_plus_one(a) >= b.size()) then
+                - a's dimensionality is greater than b's dimensionality.  In this case we
+                  pretend b is padded by as many zeros as is needed to make the dot product
+                  work.  So this means that any elements in a that go beyond the length of
+                  b are simply ignored.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename EXP>
+    typename T::value_type::second_type dot (
+        const matrix_exp<EXP>& a,
+        const T& b
+    );
+    /*!
+        requires
+            - b is an unsorted sparse vector
+            - is_vector(a) == true
+        ensures
+            - returns the dot product between the vectors a and b
+            - if (max_index_plus_one(b) >= a.size()) then
+                - b's dimensionality is greater than a's dimensionality.  In this case we
+                  pretend a is padded by as many zeros as is needed to make the dot product
+                  work.  So this means that any elements in b that go beyond the length of
+                  a are simply ignored.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    typename T::value_type::second_type length_squared (
+        const T& a
+    );
+    /*!
+        requires
+            - a is a sparse vector
+        ensures
+            - returns dot(a,a) 
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    typename T::value_type::second_type length (
+        const T& a
+    );
+    /*!
+        requires
+            - a is a sparse vector
+        ensures
+            - returns std::sqrt(length_squared(a,a))
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename U>
+    void scale_by (
+        T& a,
+        const U& value
+    );
+    /*!
+        requires
+            - a is an unsorted sparse vector or a dlib::matrix
+        ensures
+            - #a == a*value
+              (i.e. multiplies every element of the vector a by value)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    T add (
+        const T& a,
+        const T& b
+    );
+    /*!
+        requires
+            - a is a sparse vector or dlib::matrix
+            - b is a sparse vector or dlib::matrix
+        ensures
+            - returns a vector or matrix which represents a+b.  If the inputs are
+              sparse vectors then the result is a sparse vector.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    T subtract (
+        const T& a,
+        const T& b
+    );
+    /*!
+        requires
+            - a is a sparse vector or dlib::matrix
+            - b is a sparse vector or dlib::matrix
+        ensures
+            - returns a vector or matrix which represents a-b.  If the inputs are
+              sparse vectors then the result is a sparse vector.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    unsigned long max_index_plus_one (
+        const T& samples
+    ); 
+    /*!
+        requires
+            - samples == a single vector (either sparse or dense), or a container
+              of vectors which is either a dlib::matrix of vectors or something 
+              convertible to a dlib::matrix via mat() (e.g. a std::vector)
+              Valid types of samples include (but are not limited to):
+                - dlib::matrix<double,0,1>                      // A single dense vector 
+                - std::map<unsigned int, double>                // A single sparse vector
+                - std::vector<dlib::matrix<double,0,1> >        // An array of dense vectors
+                - std::vector<std::map<unsigned int, double> >  // An array of sparse vectors
+        ensures
+            - This function tells you the dimensionality of a set of vectors.  The vectors
+              can be either sparse or dense.  
+            - if (samples.size() == 0) then
+                - returns 0
+            - else if (samples contains dense vectors or is a dense vector) then
+                - returns the number of elements in the first sample vector.  This means
+                  we implicitly assume all dense vectors have the same length)
+            - else
+                - In this case samples contains sparse vectors or is a sparse vector.  
+                - returns the largest element index in any sample + 1.  Note that the element index values
+                  are the values stored in std::pair::first.  So this number tells you the dimensionality
+                  of a set of sparse vectors.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, long NR, long NC, typename MM, typename L, typename SRC, typename U>
+    inline void add_to (
+        matrix<T,NR,NC,MM,L>& dest,
+        const SRC& src,
+        const U& C = 1
+    );
+    /*!
+        requires
+            - SRC == a matrix expression or an unsorted sparse vector
+            - is_vector(dest) == true
+            - Let MAX denote the largest element index in src.
+              Then we require that:
+                - MAX < dest.size()
+                - (i.e. dest needs to be big enough to contain all the elements of src)
+        ensures
+            - #dest == dest + C*src
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, long NR, long NC, typename MM, typename L, typename SRC, typename U>
+    inline void subtract_from (
+        matrix<T,NR,NC,MM,L>& dest,
+        const SRC& src,
+        const U& C = 1
+    );
+    /*!
+        requires
+            - SRC == a matrix expression or an unsorted sparse vector
+            - is_vector(dest) == true
+            - Let MAX denote the largest element index in src.
+              Then we require that:
+                - MAX < dest.size()
+                - (i.e. dest needs to be big enough to contain all the elements of src)
+        ensures
+            - #dest == dest - C*src
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    typename T::value_type::second_type min (
+        const T& vect
+    );
+    /*!
+        requires
+            - T == an unsorted sparse vector
+        ensures
+            - returns the minimum value in the sparse vector vect.  Note that
+              this value is always <= 0 since a sparse vector has an unlimited number
+              of 0 elements.
+    !*/
+
+// ------------------------------------------------------------------------------------
+
+    template <typename T>
+    typename T::value_type::second_type max (
+        const T& vect
+    );
+    /*!
+        requires
+            - T == an unsorted sparse vector
+        ensures
+            - returns the maximum value in the sparse vector vect.  Note that
+              this value is always >= 0 since a sparse vector has an unlimited number
+              of 0 elements.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sample_type
+        >
+    matrix<typename sample_type::value_type::second_type,0,1> sparse_to_dense (
+        const sample_type& vect
+    );
+    /*!
+        requires
+            - vect must be a sparse vector or a dense column vector.
+        ensures
+            - converts the single sparse or dense vector vect to a dense (column matrix form)
+              representation.  That is, this function returns a vector V such that:
+                - V.size() == max_index_plus_one(vect)
+                - for all valid j:
+                    - V(j) == The value of the j'th dimension of the vector vect.  Note 
+                      that V(j) is zero if it is a sparse vector that doesn't contain an 
+                      entry for the j'th dimension.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sample_type
+        >
+    matrix<typename sample_type::value_type::second_type,0,1> sparse_to_dense (
+        const sample_type& vect,
+        unsigned long num_dimensions 
+    );
+    /*!
+        requires
+            - vect must be a sparse vector or a dense column vector.
+        ensures
+            - converts the single sparse or dense vector vect to a dense (column matrix form)
+              representation.  That is, this function returns a vector V such that:
+                - V.size() == num_dimensions 
+                - for all valid j:
+                    - V(j) == The value of the j'th dimension of the vector vect.  Note 
+                      that V(j) is zero if it is a sparse vector that doesn't contain an 
+                      entry for the j'th dimension.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sample_type, 
+        typename alloc
+        >
+    std::vector<matrix<typename sample_type::value_type::second_type,0,1> > sparse_to_dense (
+        const std::vector<sample_type, alloc>& samples
+    );
+    /*!
+        requires
+            - all elements of samples must be sparse vectors or dense column vectors.
+        ensures
+            - converts from sparse sample vectors to dense (column matrix form)
+            - That is, this function returns a std::vector R such that:
+                - R contains column matrices    
+                - R.size() == samples.size()
+                - for all valid i: 
+                    - R[i] == sparse_to_dense(samples[i], max_index_plus_one(samples))
+                      (i.e. the dense (i.e. dlib::matrix) version of the sparse sample
+                      given by samples[i].)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sample_type, 
+        typename alloc
+        >
+    std::vector<matrix<typename sample_type::value_type::second_type,0,1> > sparse_to_dense (
+        const std::vector<sample_type, alloc>& samples,
+        unsigned long num_dimensions 
+    );
+    /*!
+        requires
+            - all elements of samples must be sparse vectors or dense column vectors.
+        ensures
+            - converts from sparse sample vectors to dense (column matrix form)
+            - That is, this function returns a std::vector R such that:
+                - R contains column matrices    
+                - R.size() == samples.size()
+                - for all valid i: 
+                    - R[i] == sparse_to_dense(samples[i], num_dimensions)
+                      (i.e. the dense (i.e. dlib::matrix) version of the sparse sample
+                      given by samples[i].)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    T make_sparse_vector (
+        const T& v
+    );
+    /*!
+        requires
+            - v is an unsorted sparse vector
+        ensures
+            - returns a copy of v which is a sparse vector. 
+              (i.e. it will be properly sorted and not have any duplicate key values but
+              will still logically represent the same vector).
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    void make_sparse_vector_inplace(
+        T& vect
+    );
+    /*!
+        requires
+            - v is an unsorted sparse vector
+        ensures
+            - vect == make_sparse_vector(vect)
+            - This function is just an optimized version of make_sparse_vector(), in
+              particular, when T is a std::vector<std::pair<>> type it is much more
+              efficient.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename EXP, 
+        typename T, 
+        long NR, 
+        long NC, 
+        typename MM, 
+        typename L
+        >
+    void sparse_matrix_vector_multiply (
+        const std::vector<sample_pair>& edges,
+        const matrix_exp<EXP>& v,
+        matrix<T,NR,NC,MM,L>& result
+    );
+    /*!
+        requires
+            - is_col_vector(v) == true
+            - max_index_plus_one(edges) <= v.size()
+        ensures
+            - Interprets edges as representing a symmetric sparse matrix M.  The elements
+              of M are defined such that, for all valid i,j:
+                - M(i,j) == sum of edges[k].distance() for all k where edges[k]==sample_pair(i,j) 
+                - This means that any element of M that doesn't have any edges associated
+                  with it will have a value of 0.
+            - #result == M*v
+              (i.e. this function multiplies the vector v with the sparse matrix
+              represented by edges and stores the output into result)
+            - get_rect(#result) == get_rect(v)
+              (i.e. result will have the same dimensions as v)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename EXP, 
+        typename T, 
+        long NR, 
+        long NC, 
+        typename MM, 
+        typename L
+        >
+    void sparse_matrix_vector_multiply (
+        const std::vector<ordered_sample_pair>& edges,
+        const matrix_exp<EXP>& v,
+        matrix<T,NR,NC,MM,L>& result
+    );
+    /*!
+        requires
+            - is_col_vector(v) == true
+            - max_index_plus_one(edges) <= v.size()
+        ensures
+            - Interprets edges as representing a square sparse matrix M.  The elements of M
+              are defined such that, for all valid i,j:
+                - M(i,j) == sum of edges[k].distance() for all k where edges[k]==ordered_sample_pair(i,j) 
+                - This means that any element of M that doesn't have any edges associated
+                  with it will have a value of 0.
+            - #result == M*v
+              (i.e. this function multiplies the vector v with the sparse matrix
+              represented by edges and stores the output into result)
+            - get_rect(#result) == get_rect(v)
+              (i.e. result will have the same dimensions as v)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename EXP
+        >
+    matrix<typename EXP::type,0,1> sparse_matrix_vector_multiply (
+        const std::vector<sample_pair>& edges,
+        const matrix_exp<EXP>& v
+    );
+    /*!
+        requires
+            - is_col_vector(v) == true
+            - max_index_plus_one(edges) <= v.size()
+        ensures
+            - This is just a convenience routine for invoking the above
+              sparse_matrix_vector_multiply() routine.  In particular, it just calls
+              sparse_matrix_vector_multiply() with a temporary result matrix and then
+              returns the result.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename EXP
+        >
+    matrix<typename EXP::type,0,1> sparse_matrix_vector_multiply (
+        const std::vector<ordered_sample_pair>& edges,
+        const matrix_exp<EXP>& v
+    );
+    /*!
+        requires
+            - is_col_vector(v) == true
+            - max_index_plus_one(edges) <= v.size()
+        ensures
+            - This is just a convenience routine for invoking the above
+              sparse_matrix_vector_multiply() routine.  In particular, it just calls
+              sparse_matrix_vector_multiply() with a temporary result matrix and then
+              returns the result.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename EXP, 
+        typename sparse_vector_type,
+        typename T,
+        long NR,
+        long NC,
+        typename MM,
+        typename L
+        >
+    void sparse_matrix_vector_multiply (
+        const matrix_exp<EXP>& m,
+        const sparse_vector_type& v,
+        matrix<T,NR,NC,MM,L>& result
+    );
+    /*!
+        requires
+            - max_index_plus_one(v) <= m.nc()
+            - v == an unsorted sparse vector
+        ensures
+            - #result == m*v
+              (i.e. multiply m by the vector v and store the output in result)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename EXP, 
+        typename sparse_vector_type
+        >
+    matrix<typename EXP::type,0,1> sparse_matrix_vector_multiply (
+        const matrix_exp<EXP>& m,
+        const sparse_vector_type& v
+    );
+    /*!
+        requires
+            - max_index_plus_one(v) <= m.nc()
+            - v == an unsorted sparse vector
+        ensures
+            - returns m*v
+              (i.e. multiply m by the vector v and return the resulting vector)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_SPARSE_VECTOR_ABSTRACT_
+
+
+
diff --git a/ml/dlib/dlib/svm/structural_assignment_trainer.h b/ml/dlib/dlib/svm/structural_assignment_trainer.h
new file mode 100644
index 000000000..d55b74ff0
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_assignment_trainer.h
@@ -0,0 +1,294 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_Hh_
+#define DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_Hh_
+
+#include "structural_assignment_trainer_abstract.h"
+#include "../algs.h"
+#include "../optimization.h"
+#include "structural_svm_assignment_problem.h"
+#include "num_nonnegative_weights.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class structural_assignment_trainer
+    {
+    public:
+        typedef typename feature_extractor::lhs_element lhs_element;
+        typedef typename feature_extractor::rhs_element rhs_element;
+        typedef std::pair<std::vector<lhs_element>, std::vector<rhs_element> > sample_type;
+        typedef std::vector<long> label_type;
+        typedef assignment_function<feature_extractor> trained_function_type;
+
+        structural_assignment_trainer (
+        )  
+        {
+            set_defaults();
+        }
+
+        explicit structural_assignment_trainer (
+            const feature_extractor& fe_
+        ) : fe(fe_)
+        {
+            set_defaults();
+        }
+
+        const feature_extractor& get_feature_extractor (
+        ) const { return fe; }
+
+        void set_num_threads (
+            unsigned long num
+        )
+        {
+            num_threads = num;
+        }
+
+        unsigned long get_num_threads (
+        ) const
+        {
+            return num_threads;
+        }
+
+        void set_epsilon (
+            double eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void structural_assignment_trainer::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_;
+        }
+
+        double get_epsilon (
+        ) const { return eps; }
+
+        void set_max_cache_size (
+            unsigned long max_size
+        )
+        {
+            max_cache_size = max_size;
+        }
+
+        unsigned long get_max_cache_size (
+        ) const
+        {
+            return max_cache_size; 
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void set_oca (
+            const oca& item
+        )
+        {
+            solver = item;
+        }
+
+        const oca get_oca (
+        ) const
+        {
+            return solver;
+        }
+
+        void set_c (
+            double C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void structural_assignment_trainer::set_c()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+        }
+
+        double get_c (
+        ) const
+        {
+            return C;
+        }
+
+        bool forces_assignment(
+        ) const { return force_assignment; } 
+
+        void set_forces_assignment (
+            bool new_value
+        )
+        {
+            force_assignment = new_value;
+        }
+
+        void set_loss_per_false_association (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss > 0, 
+                "\t void structural_assignment_trainer::set_loss_per_false_association(loss)"
+                << "\n\t Invalid inputs were given to this function "
+                << "\n\t loss: " << loss
+                << "\n\t this: " << this
+                );
+
+            loss_per_false_association = loss;
+        }
+
+        double get_loss_per_false_association (
+        ) const
+        {
+            return loss_per_false_association;
+        }
+
+        void set_loss_per_missed_association (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss > 0, 
+                "\t void structural_assignment_trainer::set_loss_per_missed_association(loss)"
+                << "\n\t Invalid inputs were given to this function "
+                << "\n\t loss: " << loss
+                << "\n\t this: " << this
+                );
+
+            loss_per_missed_association = loss;
+        }
+
+        double get_loss_per_missed_association (
+        ) const
+        {
+            return loss_per_missed_association;
+        }
+
+        bool forces_last_weight_to_1 (
+        ) const
+        {
+            return last_weight_1;
+        }
+
+        void force_last_weight_to_1 (
+            bool should_last_weight_be_1
+        )
+        {
+            last_weight_1 = should_last_weight_be_1;
+        }
+
+        const assignment_function<feature_extractor> train (  
+            const std::vector<sample_type>& samples,
+            const std::vector<label_type>& labels
+        ) const
+        {
+            // make sure requires clause is not broken
+#ifdef ENABLE_ASSERTS
+            if (force_assignment)
+            {
+                DLIB_ASSERT(is_forced_assignment_problem(samples, labels), 
+                            "\t assignment_function structural_assignment_trainer::train()"
+                            << "\n\t invalid inputs were given to this function"
+                            << "\n\t is_forced_assignment_problem(samples,labels): " << is_forced_assignment_problem(samples,labels)
+                            << "\n\t is_assignment_problem(samples,labels):        " << is_assignment_problem(samples,labels)
+                            << "\n\t is_learning_problem(samples,labels):          " << is_learning_problem(samples,labels)
+                );
+            }
+            else
+            {
+                DLIB_ASSERT(is_assignment_problem(samples, labels),
+                            "\t assignment_function structural_assignment_trainer::train()"
+                            << "\n\t invalid inputs were given to this function"
+                            << "\n\t is_assignment_problem(samples,labels): " << is_assignment_problem(samples,labels)
+                            << "\n\t is_learning_problem(samples,labels):   " << is_learning_problem(samples,labels)
+                );
+            }
+#endif
+
+
+
+            structural_svm_assignment_problem<feature_extractor> prob(samples,labels, fe, force_assignment, num_threads,
+                loss_per_false_association, loss_per_missed_association);
+
+            if (verbose)
+                prob.be_verbose();
+
+            prob.set_c(C);
+            prob.set_epsilon(eps);
+            prob.set_max_cache_size(max_cache_size);
+
+            matrix<double,0,1> weights; 
+
+            // Take the min here because we want to prevent the user from accidentally
+            // forcing the bias term to be non-negative.
+            const unsigned long num_nonneg = std::min(fe.num_features(),num_nonnegative_weights(fe));
+            if (last_weight_1)
+                solver(prob, weights, num_nonneg, fe.num_features()-1);
+            else
+                solver(prob, weights, num_nonneg);
+
+            const double bias = weights(weights.size()-1);
+            return assignment_function<feature_extractor>(colm(weights,0,weights.size()-1), bias,fe,force_assignment);
+
+        }
+
+
+    private:
+
+        bool force_assignment;
+        double C;
+        oca solver;
+        double eps;
+        bool verbose;
+        unsigned long num_threads;
+        unsigned long max_cache_size;
+        double loss_per_false_association;
+        double loss_per_missed_association;
+        bool last_weight_1;
+
+        void set_defaults ()
+        {
+            force_assignment = false;
+            C = 100;
+            verbose = false;
+            eps = 0.01;
+            num_threads = 2;
+            max_cache_size = 5;
+            loss_per_false_association = 1;
+            loss_per_missed_association = 1;
+            last_weight_1 = false;
+        }
+
+        feature_extractor fe;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_Hh_
+
+
+
+
diff --git a/ml/dlib/dlib/svm/structural_assignment_trainer_abstract.h b/ml/dlib/dlib/svm/structural_assignment_trainer_abstract.h
new file mode 100644
index 000000000..ebd402d42
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_assignment_trainer_abstract.h
@@ -0,0 +1,299 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_ABSTRACT_Hh_
+#ifdef DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_ABSTRACT_Hh_
+
+#include "../algs.h"
+#include "structural_svm_assignment_problem.h"
+#include "assignment_function_abstract.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class structural_assignment_trainer
+    {
+        /*!
+            REQUIREMENTS ON feature_extractor
+                It must be an object that implements an interface compatible with 
+                the example_feature_extractor defined in dlib/svm/assignment_function_abstract.h.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for learning to solve an assignment problem based
+                on a training dataset of example assignments.  The training procedure produces an 
+                assignment_function object which can be used to predict the assignments of
+                new data.
+
+                Note that this is just a convenience wrapper around the 
+                structural_svm_assignment_problem to make it look 
+                similar to all the other trainers in dlib.  
+        !*/
+
+    public:
+        typedef typename feature_extractor::lhs_element lhs_element;
+        typedef typename feature_extractor::rhs_element rhs_element;
+        typedef std::pair<std::vector<lhs_element>, std::vector<rhs_element> > sample_type;
+        typedef std::vector<long> label_type;
+        typedef assignment_function<feature_extractor> trained_function_type;
+
+        structural_assignment_trainer (
+        );
+        /*!
+            ensures
+                - #get_c() == 100
+                - this object isn't verbose
+                - #get_epsilon() == 0.01
+                - #get_num_threads() == 2
+                - #get_max_cache_size() == 5
+                - #get_feature_extractor() == a default initialized feature_extractor
+                - #forces_assignment() == false
+                - #get_loss_per_false_association() == 1
+                - #get_loss_per_missed_association() == 1
+                - #forces_last_weight_to_1() == false
+        !*/
+
+        explicit structural_assignment_trainer (
+            const feature_extractor& fe
+        );
+        /*!
+            ensures
+                - #get_c() == 100
+                - this object isn't verbose
+                - #get_epsilon() == 0.01
+                - #get_num_threads() == 2
+                - #get_max_cache_size() == 40
+                - #get_feature_extractor() == fe 
+                - #forces_assignment() == false
+                - #get_loss_per_false_association() == 1
+                - #get_loss_per_missed_association() == 1
+                - #forces_last_weight_to_1() == false
+        !*/
+
+        const feature_extractor& get_feature_extractor (
+        ) const;
+        /*!
+            ensures
+                - returns the feature extractor used by this object
+        !*/
+
+        void set_num_threads (
+            unsigned long num
+        );
+        /*!
+            ensures
+                - #get_num_threads() == num
+        !*/
+
+        unsigned long get_num_threads (
+        ) const;
+        /*!
+            ensures
+                - returns the number of threads used during training.  You should 
+                  usually set this equal to the number of processing cores on your
+                  machine.
+        !*/
+
+        void set_epsilon (
+            double eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        double get_epsilon (
+        ) const; 
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer 
+                  to train.  You can think of this epsilon value as saying "solve the 
+                  optimization problem until the average number of assignment mistakes per 
+                  training sample is within epsilon of its optimal value".
+        !*/
+
+        void set_max_cache_size (
+            unsigned long max_size
+        );
+        /*!
+            ensures
+                - #get_max_cache_size() == max_size
+        !*/
+
+        unsigned long get_max_cache_size (
+        ) const;
+        /*!
+            ensures
+                - During training, this object basically runs the assignment_function on 
+                  each training sample, over and over.  To speed this up, it is possible to 
+                  cache the results of these invocations.  This function returns the number 
+                  of cache elements per training sample kept in the cache.  Note that a value 
+                  of 0 means caching is not used at all.  
+        !*/
+
+        void set_loss_per_false_association (
+            double loss
+        );
+        /*!
+            requires
+                - loss > 0
+            ensures
+                - #get_loss_per_false_association() == loss
+        !*/
+
+        double get_loss_per_false_association (
+        ) const;
+        /*!
+            ensures
+                - returns the amount of loss experienced for associating two objects
+                  together that shouldn't be associated.  If you care more about avoiding
+                  accidental associations than ensuring all possible associations are
+                  identified then then you can increase this value.
+        !*/
+
+        void set_loss_per_missed_association (
+            double loss
+        );
+        /*!
+            requires
+                - loss > 0
+            ensures
+                - #get_loss_per_missed_association() == loss
+        !*/
+
+        double get_loss_per_missed_association (
+        ) const;
+        /*!
+            ensures
+                - returns the amount of loss experienced for failing to associate two
+                  objects that are supposed to be associated.  If you care more about
+                  getting all the associations than avoiding accidentally associating
+                  objects that shouldn't be associated then you can increase this value.
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the optimizer used to solve the structural SVM problem.  
+        !*/
+
+        void set_c (
+            double C 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() = C
+        !*/
+
+        double get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter 
+                  that determines the trade-off between trying to fit the training 
+                  data (i.e. minimize the loss) or allowing more errors but hopefully 
+                  improving the generalization of the resulting assignment_function.  
+                  Larger values encourage exact fitting while smaller values of C may 
+                  encourage better generalization. 
+        !*/
+
+        void set_forces_assignment (
+            bool new_value
+        );
+        /*!
+            ensures
+                - #forces_assignment() == new_value
+        !*/
+
+        bool forces_assignment(
+        ) const; 
+        /*!
+            ensures
+                - returns the value of the forces_assignment() parameter for the
+                  assignment_functions generated by this object.  
+        !*/
+
+        bool forces_last_weight_to_1 (
+        ) const;
+        /*!
+            ensures
+                - returns true if this trainer has the constraint that the last weight in
+                  the learned parameter vector must be 1.  This is the weight corresponding
+                  to the feature in the training vectors with the highest dimension.  
+                - Forcing the last weight to 1 also disables the bias and therefore the
+                  get_bias() field of the learned assignment_function will be 0 when
+                  forces_last_weight_to_1() == true.
+        !*/
+
+        void force_last_weight_to_1 (
+            bool should_last_weight_be_1
+        );
+        /*!
+            ensures
+                - #forces_last_weight_to_1() == should_last_weight_be_1
+        !*/
+
+        const assignment_function<feature_extractor> train (  
+            const std::vector<sample_type>& samples,
+            const std::vector<label_type>& labels
+        ) const;
+        /*!
+            requires
+                - is_assignment_problem(samples,labels) == true
+                - if (forces_assignment()) then
+                    - is_forced_assignment_problem(samples,labels) == true
+            ensures
+                - Uses the structural_svm_assignment_problem to train an 
+                  assignment_function on the given samples/labels training pairs.  
+                  The idea is to learn to predict a label given an input sample.
+                - returns a function F with the following properties:
+                    - F(new_sample) == A set of assignments indicating how the elements of 
+                      new_sample.first match up with the elements of new_sample.second.
+                    - F.forces_assignment() == forces_assignment()
+                    - F.get_feature_extractor() == get_feature_extractor()
+                    - if (forces_last_weight_to_1()) then
+                        - F.get_bias() == 0
+                        - F.get_weights()(F.get_weights().size()-1) == 1
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_ASSiGNMENT_TRAINER_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/structural_graph_labeling_trainer.h b/ml/dlib/dlib/svm/structural_graph_labeling_trainer.h
new file mode 100644
index 000000000..4d55c772b
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_graph_labeling_trainer.h
@@ -0,0 +1,282 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_GRAPH_LABELING_tRAINER_Hh_
+#define DLIB_STRUCTURAL_GRAPH_LABELING_tRAINER_Hh_
+
+#include "structural_graph_labeling_trainer_abstract.h"
+#include "../algs.h"
+#include "../optimization.h"
+#include "structural_svm_graph_labeling_problem.h"
+#include "../graph_cuts/graph_labeler.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type 
+        >
+    class structural_graph_labeling_trainer
+    {
+    public:
+        typedef std::vector<bool> label_type;
+        typedef graph_labeler<vector_type> trained_function_type;
+
+        structural_graph_labeling_trainer (
+        )  
+        {
+            C = 10;
+            verbose = false;
+            eps = 0.1;
+            num_threads = 2;
+            max_cache_size = 5;
+            loss_pos = 1.0;
+            loss_neg = 1.0;
+        }
+
+        void set_num_threads (
+            unsigned long num
+        )
+        {
+            num_threads = num;
+        }
+
+        unsigned long get_num_threads (
+        ) const
+        {
+            return num_threads;
+        }
+
+        void set_epsilon (
+            double eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void structural_graph_labeling_trainer::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_;
+        }
+
+        double get_epsilon (
+        ) const { return eps; }
+
+        void set_max_cache_size (
+            unsigned long max_size
+        )
+        {
+            max_cache_size = max_size;
+        }
+
+        unsigned long get_max_cache_size (
+        ) const
+        {
+            return max_cache_size; 
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void set_oca (
+            const oca& item
+        )
+        {
+            solver = item;
+        }
+
+        const oca get_oca (
+        ) const
+        {
+            return solver;
+        }
+
+        void set_c (
+            double C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void structural_graph_labeling_trainer::set_c()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+        }
+
+        double get_c (
+        ) const
+        {
+            return C;
+        }
+
+
+        void set_loss_on_positive_class (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss >= 0,
+                    "\t structural_graph_labeling_trainer::set_loss_on_positive_class()"
+                    << "\n\t Invalid inputs were given to this function."
+                    << "\n\t loss: " << loss 
+                    << "\n\t this: " << this );
+
+            loss_pos = loss;
+        }
+
+        void set_loss_on_negative_class (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss >= 0,
+                    "\t structural_graph_labeling_trainer::set_loss_on_negative_class()"
+                    << "\n\t Invalid inputs were given to this function."
+                    << "\n\t loss: " << loss 
+                    << "\n\t this: " << this );
+
+            loss_neg = loss;
+        }
+
+        double get_loss_on_negative_class (
+        ) const { return loss_neg; }
+
+        double get_loss_on_positive_class (
+        ) const { return loss_pos; }
+
+
+        template <
+            typename graph_type
+            >
+        const graph_labeler<vector_type> train (  
+            const dlib::array<graph_type>& samples,
+            const std::vector<label_type>& labels,
+            const std::vector<std::vector<double> >& losses
+        ) const
+        {
+#ifdef ENABLE_ASSERTS
+            std::string reason_for_failure;
+            DLIB_ASSERT(is_graph_labeling_problem(samples, labels, reason_for_failure) == true ,
+                    "\t void structural_graph_labeling_trainer::train()"
+                    << "\n\t Invalid inputs were given to this function."
+                    << "\n\t reason_for_failure: " << reason_for_failure 
+                    << "\n\t samples.size(): " << samples.size() 
+                    << "\n\t labels.size():  " << labels.size() 
+                    << "\n\t this: " << this );
+            DLIB_ASSERT((losses.size() == 0 || sizes_match(labels, losses) == true) &&
+                        all_values_are_nonnegative(losses) == true,
+                    "\t void structural_graph_labeling_trainer::train()"
+                    << "\n\t Invalid inputs were given to this function."
+                    << "\n\t labels.size():  " << labels.size() 
+                    << "\n\t losses.size():  " << losses.size() 
+                    << "\n\t sizes_match(labels,losses): " << sizes_match(labels,losses) 
+                    << "\n\t all_values_are_nonnegative(losses): " << all_values_are_nonnegative(losses) 
+                    << "\n\t this: " << this );
+#endif
+
+
+            structural_svm_graph_labeling_problem<graph_type> prob(samples, labels, losses, num_threads);
+
+            if (verbose)
+                prob.be_verbose();
+
+            prob.set_c(C);
+            prob.set_epsilon(eps);
+            prob.set_max_cache_size(max_cache_size);
+            if (prob.get_losses().size() == 0)
+            {
+                prob.set_loss_on_positive_class(loss_pos);
+                prob.set_loss_on_negative_class(loss_neg);
+            }
+
+            matrix<double,0,1> w;
+            solver(prob, w, prob.get_num_edge_weights());
+
+            vector_type edge_weights;
+            vector_type node_weights;
+            populate_weights(w, edge_weights, node_weights, prob.get_num_edge_weights());
+            return graph_labeler<vector_type>(edge_weights, node_weights);
+        }
+
+        template <
+            typename graph_type
+            >
+        const graph_labeler<vector_type> train (  
+            const dlib::array<graph_type>& samples,
+            const std::vector<label_type>& labels
+        ) const
+        {
+            std::vector<std::vector<double> > losses;
+            return train(samples, labels, losses);
+        }
+
+    private:
+
+        template <typename T>
+        typename enable_if<is_matrix<T> >::type populate_weights (
+            const matrix<double,0,1>& w,
+            T& edge_weights,
+            T& node_weights,
+            long split_idx
+        ) const
+        {
+            edge_weights = rowm(w,range(0, split_idx-1));
+            node_weights = rowm(w,range(split_idx,w.size()-1));
+        }
+
+        template <typename T>
+        typename disable_if<is_matrix<T> >::type populate_weights (
+            const matrix<double,0,1>& w,
+            T& edge_weights,
+            T& node_weights,
+            long split_idx
+        ) const
+        {
+            edge_weights.clear();
+            node_weights.clear();
+            for (long i = 0; i < split_idx; ++i)
+            {
+                if (w(i) != 0)
+                    edge_weights.insert(edge_weights.end(), std::make_pair(i,w(i)));
+            }
+            for (long i = split_idx; i < w.size(); ++i)
+            {
+                if (w(i) != 0)
+                    node_weights.insert(node_weights.end(), std::make_pair(i-split_idx,w(i)));
+            }
+        }
+
+
+        double C;
+        oca solver;
+        double eps;
+        bool verbose;
+        unsigned long num_threads;
+        unsigned long max_cache_size;
+        double loss_pos;
+        double loss_neg;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_GRAPH_LABELING_tRAINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/structural_graph_labeling_trainer_abstract.h b/ml/dlib/dlib/svm/structural_graph_labeling_trainer_abstract.h
new file mode 100644
index 000000000..df88096a0
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_graph_labeling_trainer_abstract.h
@@ -0,0 +1,265 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_GRAPH_LABELING_tRAINER_ABSTRACT_Hh_
+#ifdef DLIB_STRUCTURAL_GRAPH_LABELING_tRAINER_ABSTRACT_Hh_
+
+#include "../algs.h"
+#include "../optimization.h"
+#include "structural_svm_graph_labeling_problem_abstract.h"
+#include "../graph_cuts/graph_labeler_abstract.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename vector_type 
+        >
+    class structural_graph_labeling_trainer
+    {
+        /*!
+            REQUIREMENTS ON vector_type 
+                - vector_type is a dlib::matrix capable of representing column 
+                  vectors or it is a sparse vector type as defined in dlib/svm/sparse_vector_abstract.h.  
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for learning to solve a graph labeling problem based
+                on a training dataset of example labeled graphs.  The training procedure 
+                produces a graph_labeler object which can be used to predict the labelings
+                of new graphs.
+
+                Note that this is just a convenience wrapper around the 
+                structural_svm_graph_labeling_problem to make it look 
+                similar to all the other trainers in dlib.  
+        !*/
+
+    public:
+        typedef std::vector<bool> label_type;
+        typedef graph_labeler<vector_type> trained_function_type;
+
+        structural_graph_labeling_trainer (
+        );
+        /*!
+            ensures
+                - #get_c() == 10
+                - this object isn't verbose
+                - #get_epsilon() == 0.1
+                - #get_num_threads() == 2
+                - #get_max_cache_size() == 5
+                - #get_loss_on_positive_class() == 1.0
+                - #get_loss_on_negative_class() == 1.0
+        !*/
+
+        void set_num_threads (
+            unsigned long num
+        );
+        /*!
+            ensures
+                - #get_num_threads() == num
+        !*/
+
+        unsigned long get_num_threads (
+        ) const;
+        /*!
+            ensures
+                - returns the number of threads used during training.  You should 
+                  usually set this equal to the number of processing cores on your
+                  machine.
+        !*/
+
+        void set_epsilon (
+            double eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        double get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer 
+                  to train.  You can think of this epsilon value as saying "solve the 
+                  optimization problem until the average number of labeling mistakes per 
+                  example graph is within epsilon of its optimal value".
+        !*/
+
+        void set_max_cache_size (
+            unsigned long max_size
+        );
+        /*!
+            ensures
+                - #get_max_cache_size() == max_size
+        !*/
+
+        unsigned long get_max_cache_size (
+        ) const;
+        /*!
+            ensures
+                - During training, this object basically runs the graph_labeler on each 
+                  training sample, over and over.  To speed this up, it is possible to 
+                  cache the results of these invocations.  This function returns the number 
+                  of cache elements per training sample kept in the cache.  Note that a value 
+                  of 0 means caching is not used at all.  
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the optimizer used to solve the structural SVM problem.  
+        !*/
+
+        void set_c (
+            double C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() = C
+        !*/
+
+        double get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter 
+                  that determines the trade-off between trying to fit the training 
+                  data (i.e. minimize the loss) or allowing more errors but hopefully 
+                  improving the generalization of the resulting graph_labeler.  Larger 
+                  values encourage exact fitting while smaller values of C may encourage 
+                  better generalization. 
+        !*/
+
+        void set_loss_on_positive_class (
+            double loss
+        );
+        /*!
+            requires
+                - loss >= 0
+            ensures
+                - #get_loss_on_positive_class() == loss
+        !*/
+
+        void set_loss_on_negative_class (
+            double loss
+        );
+        /*!
+            requires
+                - loss >= 0
+            ensures
+                - #get_loss_on_negative_class() == loss
+        !*/
+
+        double get_loss_on_positive_class (
+        ) const;
+        /*!
+            ensures
+                - returns the loss incurred when a graph node which is supposed to have
+                  a label of true gets misclassified.  This value controls how much we care 
+                  about correctly classifying nodes which should be labeled as true.  Larger 
+                  loss values indicate that we care more strongly than smaller values.
+        !*/
+
+        double get_loss_on_negative_class (
+        ) const;
+        /*!
+            ensures
+                - returns the loss incurred when a graph node which is supposed to have
+                  a label of false gets misclassified.  This value controls how much we care 
+                  about correctly classifying nodes which should be labeled as false.  Larger 
+                  loss values indicate that we care more strongly than smaller values.
+        !*/
+
+        template <
+            typename graph_type
+            >
+        const graph_labeler<vector_type> train (  
+            const dlib::array<graph_type>& samples,
+            const std::vector<label_type>& labels
+        ) const;
+        /*!
+            requires
+                - is_graph_labeling_problem(samples,labels) == true
+            ensures
+                - Uses the structural_svm_graph_labeling_problem to train a graph_labeler
+                  on the given samples/labels training pairs.  The idea is to learn to
+                  predict a label given an input sample.
+                - The values of get_loss_on_positive_class() and get_loss_on_negative_class() 
+                  are used to determine how to value mistakes on each node during training.
+                - returns a function F with the following properties:
+                    - F(new_sample) == The predicted labels for the nodes in the graph
+                      new_sample.
+        !*/
+
+        template <
+            typename graph_type
+            >
+        const graph_labeler<vector_type> train (  
+            const dlib::array<graph_type>& samples,
+            const std::vector<label_type>& labels,
+            const std::vector<std::vector<double> >& losses
+        ) const;
+        /*!
+            requires
+                - is_graph_labeling_problem(samples,labels) == true
+                - if (losses.size() != 0) then
+                    - sizes_match(labels, losses) == true
+                    - all_values_are_nonnegative(losses) == true
+            ensures
+                - Uses the structural_svm_graph_labeling_problem to train a graph_labeler
+                  on the given samples/labels training pairs.  The idea is to learn to
+                  predict a label given an input sample.
+                - returns a function F with the following properties:
+                    - F(new_sample) == The predicted labels for the nodes in the graph
+                      new_sample.
+                - if (losses.size() == 0) then
+                    - The values of get_loss_on_positive_class() and get_loss_on_negative_class() 
+                      are used to determine how to value mistakes on each node during training.
+                    - The losses argument is effectively ignored if its size is zero.
+                - else
+                    - Each node in the training data has its own loss value defined by the
+                      corresponding entry of losses.  In particular, this means that the
+                      node with label labels[i][j] incurs a loss of losses[i][j] if it is
+                      incorrectly labeled.
+                    - The get_loss_on_positive_class() and get_loss_on_negative_class()
+                      parameters are ignored.  Only losses is used in this case.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_GRAPH_LABELING_tRAINER_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/structural_object_detection_trainer.h b/ml/dlib/dlib/svm/structural_object_detection_trainer.h
new file mode 100644
index 000000000..bdf8c5b5c
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_object_detection_trainer.h
@@ -0,0 +1,402 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_Hh_
+#define DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_Hh_
+
+#include "structural_object_detection_trainer_abstract.h"
+#include "../algs.h"
+#include "../optimization.h"
+#include "structural_svm_object_detection_problem.h"
+#include "../image_processing/object_detector.h"
+#include "../image_processing/box_overlap_testing.h"
+#include "../image_processing/full_object_detection.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_scanner_type,
+        typename svm_struct_prob_type
+        >
+    void configure_nuclear_norm_regularizer (
+        const image_scanner_type&,
+        svm_struct_prob_type& 
+    )
+    { 
+        // does nothing by default.  Specific scanner types overload this function to do
+        // whatever is appropriate.
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_scanner_type
+        >
+    class structural_object_detection_trainer : noncopyable
+    {
+
+    public:
+        typedef double scalar_type;
+        typedef default_memory_manager mem_manager_type;
+        typedef object_detector<image_scanner_type> trained_function_type;
+
+
+        explicit structural_object_detection_trainer (
+            const image_scanner_type& scanner_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(scanner_.get_num_detection_templates() > 0,
+                "\t structural_object_detection_trainer::structural_object_detection_trainer(scanner_)"
+                << "\n\t You can't have zero detection templates"
+                << "\n\t this: " << this
+                );
+
+            C = 1;
+            verbose = false;
+            eps = 0.1;
+            num_threads = 2;
+            max_cache_size = 5;
+            match_eps = 0.5;
+            loss_per_missed_target = 1;
+            loss_per_false_alarm = 1;
+
+            scanner.copy_configuration(scanner_);
+
+            auto_overlap_tester = true;
+        }
+
+        const image_scanner_type& get_scanner (
+        ) const
+        {
+            return scanner;
+        }
+
+        bool auto_set_overlap_tester (
+        ) const 
+        { 
+            return auto_overlap_tester; 
+        }
+
+        void set_overlap_tester (
+            const test_box_overlap& tester
+        )
+        {
+            overlap_tester = tester;
+            auto_overlap_tester = false;
+        }
+
+        test_box_overlap get_overlap_tester (
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(auto_set_overlap_tester() == false,
+                "\t test_box_overlap structural_object_detection_trainer::get_overlap_tester()"
+                << "\n\t You can't call this function if the overlap tester is generated dynamically."
+                << "\n\t this: " << this
+                );
+
+            return overlap_tester;
+        }
+
+        void set_num_threads (
+            unsigned long num
+        )
+        {
+            num_threads = num;
+        }
+
+        unsigned long get_num_threads (
+        ) const
+        {
+            return num_threads;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void structural_object_detection_trainer::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_;
+        }
+
+        scalar_type get_epsilon (
+        ) const { return eps; }
+
+        void set_max_cache_size (
+            unsigned long max_size
+        )
+        {
+            max_cache_size = max_size;
+        }
+
+        unsigned long get_max_cache_size (
+        ) const
+        {
+            return max_cache_size; 
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void set_oca (
+            const oca& item
+        )
+        {
+            solver = item;
+        }
+
+        const oca get_oca (
+        ) const
+        {
+            return solver;
+        }
+
+        void set_c (
+            scalar_type C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void structural_object_detection_trainer::set_c()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+        }
+
+        scalar_type get_c (
+        ) const
+        {
+            return C;
+        }
+
+        void set_match_eps (
+            double eps
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < eps && eps < 1, 
+                "\t void structural_object_detection_trainer::set_match_eps(eps)"
+                << "\n\t Invalid inputs were given to this function "
+                << "\n\t eps:  " << eps 
+                << "\n\t this: " << this
+                );
+
+            match_eps = eps;
+        }
+
+        double get_match_eps (
+        ) const
+        {
+            return match_eps;
+        }
+
+        double get_loss_per_missed_target (
+        ) const
+        {
+            return loss_per_missed_target;
+        }
+
+        void set_loss_per_missed_target (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss > 0, 
+                "\t void structural_object_detection_trainer::set_loss_per_missed_target(loss)"
+                << "\n\t Invalid inputs were given to this function "
+                << "\n\t loss: " << loss
+                << "\n\t this: " << this
+                );
+
+            loss_per_missed_target = loss;
+        }
+
+        double get_loss_per_false_alarm (
+        ) const
+        {
+            return loss_per_false_alarm;
+        }
+
+        void set_loss_per_false_alarm (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss > 0, 
+                "\t void structural_object_detection_trainer::set_loss_per_false_alarm(loss)"
+                << "\n\t Invalid inputs were given to this function "
+                << "\n\t loss: " << loss
+                << "\n\t this: " << this
+                );
+
+            loss_per_false_alarm = loss;
+        }
+
+        template <
+            typename image_array_type
+            >
+        const trained_function_type train (
+            const image_array_type& images,
+            const std::vector<std::vector<full_object_detection> >& truth_object_detections
+        ) const
+        {
+            std::vector<std::vector<rectangle> > empty_ignore(images.size());
+            return train_impl(images, truth_object_detections, empty_ignore, test_box_overlap());
+        }
+
+        template <
+            typename image_array_type
+            >
+        const trained_function_type train (
+            const image_array_type& images,
+            const std::vector<std::vector<full_object_detection> >& truth_object_detections,
+            const std::vector<std::vector<rectangle> >& ignore,
+            const test_box_overlap& ignore_overlap_tester = test_box_overlap()
+        ) const
+        {
+            return train_impl(images, truth_object_detections, ignore, ignore_overlap_tester);
+        }
+
+        template <
+            typename image_array_type
+            >
+        const trained_function_type train (
+            const image_array_type& images,
+            const std::vector<std::vector<rectangle> >& truth_object_detections
+        ) const
+        {
+            std::vector<std::vector<rectangle> > empty_ignore(images.size());
+            return train(images, truth_object_detections, empty_ignore, test_box_overlap());
+        }
+
+        template <
+            typename image_array_type
+            >
+        const trained_function_type train (
+            const image_array_type& images,
+            const std::vector<std::vector<rectangle> >& truth_object_detections,
+            const std::vector<std::vector<rectangle> >& ignore,
+            const test_box_overlap& ignore_overlap_tester = test_box_overlap()
+        ) const
+        {
+            std::vector<std::vector<full_object_detection> > truth_dets(truth_object_detections.size());
+            for (unsigned long i = 0; i < truth_object_detections.size(); ++i)
+            {
+                for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j)
+                {
+                    truth_dets[i].push_back(full_object_detection(truth_object_detections[i][j]));
+                }
+            }
+
+            return train_impl(images, truth_dets, ignore, ignore_overlap_tester);
+        }
+
+    private:
+
+        template <
+            typename image_array_type
+            >
+        const trained_function_type train_impl (
+            const image_array_type& images,
+            const std::vector<std::vector<full_object_detection> >& truth_object_detections,
+            const std::vector<std::vector<rectangle> >& ignore,
+            const test_box_overlap& ignore_overlap_tester
+        ) const
+        {
+#ifdef ENABLE_ASSERTS
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(images,truth_object_detections) == true && images.size() == ignore.size(),
+                "\t trained_function_type structural_object_detection_trainer::train()"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t images.size():      " << images.size()
+                << "\n\t ignore.size():      " << ignore.size()
+                << "\n\t truth_object_detections.size(): " << truth_object_detections.size()
+                << "\n\t is_learning_problem(images,truth_object_detections): " << is_learning_problem(images,truth_object_detections)
+                );
+            for (unsigned long i = 0; i < truth_object_detections.size(); ++i)
+            {
+                for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j)
+                {
+                    DLIB_ASSERT(truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template() &&
+                                all_parts_in_rect(truth_object_detections[i][j]) == true,
+                        "\t trained_function_type structural_object_detection_trainer::train()"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t truth_object_detections["<<i<<"]["<<j<<"].num_parts():                " << 
+                            truth_object_detections[i][j].num_parts()
+                        << "\n\t get_scanner().get_num_movable_components_per_detection_template(): " << 
+                            get_scanner().get_num_movable_components_per_detection_template()
+                        << "\n\t all_parts_in_rect(truth_object_detections["<<i<<"]["<<j<<"]): " << all_parts_in_rect(truth_object_detections[i][j])
+                    );
+                }
+            }
+#endif
+
+            structural_svm_object_detection_problem<image_scanner_type,image_array_type > 
+                svm_prob(scanner, overlap_tester, auto_overlap_tester, images,
+                    truth_object_detections, ignore, ignore_overlap_tester, num_threads);
+
+            if (verbose)
+                svm_prob.be_verbose();
+
+            svm_prob.set_c(C);
+            svm_prob.set_epsilon(eps);
+            svm_prob.set_max_cache_size(max_cache_size);
+            svm_prob.set_match_eps(match_eps);
+            svm_prob.set_loss_per_missed_target(loss_per_missed_target);
+            svm_prob.set_loss_per_false_alarm(loss_per_false_alarm);
+            configure_nuclear_norm_regularizer(scanner, svm_prob);
+            matrix<double,0,1> w;
+
+            // Run the optimizer to find the optimal w.
+            solver(svm_prob,w);
+
+            // report the results of the training.
+            return object_detector<image_scanner_type>(scanner, svm_prob.get_overlap_tester(), w);
+        }
+
+        image_scanner_type scanner;
+        test_box_overlap overlap_tester;
+
+        double C;
+        oca solver;
+        double eps;
+        double match_eps;
+        bool verbose;
+        unsigned long num_threads;
+        unsigned long max_cache_size;
+        double loss_per_missed_target;
+        double loss_per_false_alarm;
+        bool auto_overlap_tester;
+
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h b/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h
new file mode 100644
index 000000000..2dd799874
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_object_detection_trainer_abstract.h
@@ -0,0 +1,390 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_
+#ifdef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_
+
+#include "structural_svm_object_detection_problem_abstract.h"
+#include "../image_processing/object_detector_abstract.h"
+#include "../image_processing/box_overlap_testing_abstract.h"
+#include "../image_processing/full_object_detection_abstract.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_scanner_type
+        >
+    class structural_object_detection_trainer : noncopyable
+    {
+        /*!
+            REQUIREMENTS ON image_scanner_type
+                image_scanner_type must be an implementation of 
+                dlib/image_processing/scan_fhog_pyramid_abstract.h or
+                dlib/image_processing/scan_image_custom_abstract.h or
+                dlib/image_processing/scan_image_pyramid_abstract.h or
+                dlib/image_processing/scan_image_boxes_abstract.h
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for learning to detect objects in images based on a 
+                set of labeled images. The training procedure produces an object_detector 
+                which can be used to predict the locations of objects in new images.
+
+                Note that this is just a convenience wrapper around the structural_svm_object_detection_problem 
+                to make it look similar to all the other trainers in dlib.  
+        !*/
+
+    public:
+        typedef double scalar_type;
+        typedef default_memory_manager mem_manager_type;
+        typedef object_detector<image_scanner_type> trained_function_type;
+
+
+        explicit structural_object_detection_trainer (
+            const image_scanner_type& scanner
+        );
+        /*!
+            requires
+                - scanner.get_num_detection_templates() > 0
+            ensures
+                - #get_c() == 1
+                - this object isn't verbose
+                - #get_epsilon() == 0.1
+                - #get_num_threads() == 2
+                - #get_max_cache_size() == 5
+                - #get_match_eps() == 0.5
+                - #get_loss_per_missed_target() == 1
+                - #get_loss_per_false_alarm() == 1
+                - This object will attempt to learn a model for the given
+                  scanner object when train() is called.
+                - #get_scanner() == scanner
+                  (note that only the "configuration" of scanner is copied.
+                  I.e. the copy is done using copy_configuration())
+                - #auto_set_overlap_tester() == true
+        !*/
+
+        const image_scanner_type& get_scanner (
+        ) const;
+        /*!
+            ensures
+                - returns the image scanner used by this object.  
+        !*/
+
+        bool auto_set_overlap_tester (
+        ) const;
+        /*!
+            ensures
+                - if (this object will automatically determine an appropriate 
+                  state for the overlap tester used for non-max suppression.) then
+                    - returns true
+                    - In this case, it is determined using the find_tight_overlap_tester() 
+                      routine based on the truth_object_detections given to the 
+                      structural_object_detection_trainer::train() method.  
+                - else
+                    - returns false
+        !*/
+
+        void set_overlap_tester (
+            const test_box_overlap& tester
+        );
+        /*!
+            ensures
+                - #get_overlap_tester() == tester
+                - #auto_set_overlap_tester() == false
+        !*/
+
+        test_box_overlap get_overlap_tester (
+        ) const;
+        /*!
+            requires
+                - auto_set_overlap_tester() == false
+            ensures
+                - returns the overlap tester object which will be used to perform non-max suppression.
+                  In particular, this function returns the overlap tester which will populate the
+                  object_detector returned by train().
+        !*/
+
+        void set_num_threads (
+            unsigned long num
+        );
+        /*!
+            ensures
+                - #get_num_threads() == num
+        !*/
+
+        unsigned long get_num_threads (
+        ) const;
+        /*!
+            ensures
+                - returns the number of threads used during training.  You should 
+                  usually set this equal to the number of processing cores on your
+                  machine.
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer 
+                  to train.  You can think of this epsilon value as saying "solve the 
+                  optimization problem until the average loss per sample is within epsilon 
+                  of its optimal value".
+        !*/
+
+        void set_max_cache_size (
+            unsigned long max_size
+        );
+        /*!
+            ensures
+                - #get_max_cache_size() == max_size
+        !*/
+
+        unsigned long get_max_cache_size (
+        ) const;
+        /*!
+            ensures
+                - During training, this object basically runs the object detector on 
+                  each image, over and over.  To speed this up, it is possible to cache
+                  the results of these detector invocations.  This function returns the 
+                  number of cache elements per training sample kept in the cache.  Note 
+                  that a value of 0 means caching is not used at all.  Note also that 
+                  each cache element takes up about sizeof(double)*scanner.get_num_dimensions()
+                  memory (where scanner is the scanner given to this object's constructor).
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the optimizer used to solve the structural SVM problem.  
+        !*/
+
+        void set_c (
+            scalar_type C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() = C
+        !*/
+
+        const scalar_type get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter 
+                  that determines the trade-off between trying to fit the training 
+                  data (i.e. minimize the loss) or allowing more errors but hopefully 
+                  improving the generalization of the resulting detector.  Larger 
+                  values encourage exact fitting while smaller values of C may encourage 
+                  better generalization. 
+        !*/
+
+        void set_match_eps (
+            double eps
+        );
+        /*!
+            requires
+                - 0 < eps < 1
+            ensures
+                - #get_match_eps() == eps
+        !*/
+
+        double get_match_eps (
+        ) const;
+        /*!
+            ensures
+                - returns the amount of alignment necessary for a detection to be considered
+                  as matching with a ground truth rectangle.  If it doesn't match then
+                  it is considered to be a false alarm.  To define this precisely, let
+                  A and B be two rectangles, then A and B match if and only if:
+                    A.intersect(B).area()/(A+B).area() > get_match_eps()
+        !*/
+
+        double get_loss_per_missed_target (
+        ) const;
+        /*!
+            ensures
+                - returns the amount of loss experienced for failing to detect one of the
+                  targets.  If you care more about finding targets than having a low false
+                  alarm rate then you can increase this value.
+        !*/
+
+        void set_loss_per_missed_target (
+            double loss
+        );
+        /*!
+            requires
+                - loss > 0
+            ensures
+                - #get_loss_per_missed_target() == loss
+        !*/
+
+        double get_loss_per_false_alarm (
+        ) const;
+        /*!
+            ensures
+                - returns the amount of loss experienced for emitting a false alarm detection.
+                  Or in other words, the loss for generating a detection that doesn't correspond 
+                  to one of the truth rectangles.  If you care more about having a low false
+                  alarm rate than finding all the targets then you can increase this value.
+        !*/
+
+        void set_loss_per_false_alarm (
+            double loss
+        );
+        /*!
+            requires
+                - loss > 0
+            ensures
+                - #get_loss_per_false_alarm() == loss
+        !*/
+
+        template <
+            typename image_array_type
+            >
+        const trained_function_type train (
+            const image_array_type& images,
+            const std::vector<std::vector<full_object_detection> >& truth_object_detections
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(images, truth_object_detections) == true
+                - it must be valid to pass images[0] into the image_scanner_type::load() method.
+                  (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
+                - for all valid i, j:
+                    - truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template() 
+                    - all_parts_in_rect(truth_object_detections[i][j]) == true
+            ensures
+                - Uses the structural_svm_object_detection_problem to train an object_detector 
+                  on the given images and truth_object_detections.  
+                - returns a function F with the following properties:
+                    - F(new_image) == A prediction of what objects are present in new_image.  This
+                      is a set of rectangles indicating their positions.
+        !*/
+
+        template <
+            typename image_array_type
+            >
+        const trained_function_type train (
+            const image_array_type& images,
+            const std::vector<std::vector<rectangle> >& truth_object_detections
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(images, truth_object_detections) == true
+                - it must be valid to pass images[0] into the image_scanner_type::load() method.
+                  (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
+                - get_scanner().get_num_movable_components_per_detection_template() == 0
+            ensures
+                - This function is identical to the above train(), except that it converts 
+                  each element of truth_object_detections into a full_object_detection by 
+                  passing it to full_object_detection's constructor taking only a rectangle.
+                  Therefore, this version of train() is a convenience function for for the 
+                  case where you don't have any movable components of the detection templates.
+        !*/
+
+        template <
+            typename image_array_type
+            >
+        const trained_function_type train (
+            const image_array_type& images,
+            const std::vector<std::vector<full_object_detection> >& truth_object_detections,
+            const std::vector<std::vector<rectangle> >& ignore,
+            const test_box_overlap& ignore_overlap_tester = test_box_overlap()
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(images, truth_object_detections) == true
+                - it must be valid to pass images[0] into the image_scanner_type::load() method.
+                  (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
+                - ignore.size() == images.size()
+                - for all valid i, j:
+                    - truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template() 
+                    - all_parts_in_rect(truth_object_detections[i][j]) == true
+            ensures
+                - Uses the structural_svm_object_detection_problem to train an object_detector 
+                  on the given images and truth_object_detections.  
+                - for all valid i:
+                    - Within images[i] any detections that match against a rectangle in
+                      ignore[i], according to ignore_overlap_tester, are ignored.  That is,
+                      the optimizer doesn't care if the detector outputs a detection that
+                      matches any of the ignore rectangles or if it fails to output a
+                      detection for an ignore rectangle.  Therefore, if there are objects
+                      in your dataset that you are unsure if you want to detect or otherwise
+                      don't care if the detector gets or doesn't then you can mark them
+                      with ignore rectangles and the optimizer will simply ignore them. 
+                - returns a function F with the following properties:
+                    - F(new_image) == A prediction of what objects are present in new_image.  This
+                      is a set of rectangles indicating their positions.
+        !*/
+
+        template <
+            typename image_array_type
+            >
+        const trained_function_type train (
+            const image_array_type& images,
+            const std::vector<std::vector<rectangle> >& truth_object_detections,
+            const std::vector<std::vector<rectangle> >& ignore,
+            const test_box_overlap& ignore_overlap_tester = test_box_overlap()
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(images, truth_object_detections) == true
+                - ignore.size() == images.size()
+                - it must be valid to pass images[0] into the image_scanner_type::load() method.
+                  (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h)
+                - get_scanner().get_num_movable_components_per_detection_template() == 0
+            ensures
+                - This function is identical to the above train(), except that it converts 
+                  each element of truth_object_detections into a full_object_detection by 
+                  passing it to full_object_detection's constructor taking only a rectangle.
+                  Therefore, this version of train() is a convenience function for for the 
+                  case where you don't have any movable components of the detection templates.
+        !*/
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_
+
+
diff --git a/ml/dlib/dlib/svm/structural_sequence_labeling_trainer.h b/ml/dlib/dlib/svm/structural_sequence_labeling_trainer.h
new file mode 100644
index 000000000..9b61fd6c2
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_sequence_labeling_trainer.h
@@ -0,0 +1,271 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_Hh_
+#define DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_Hh_
+
+#include "structural_sequence_labeling_trainer_abstract.h"
+#include "../algs.h"
+#include "../optimization.h"
+#include "structural_svm_sequence_labeling_problem.h"
+#include "num_nonnegative_weights.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class structural_sequence_labeling_trainer
+    {
+    public:
+        typedef typename feature_extractor::sequence_type sample_sequence_type;
+        typedef std::vector<unsigned long> labeled_sequence_type;
+
+        typedef sequence_labeler<feature_extractor> trained_function_type;
+
+        explicit structural_sequence_labeling_trainer (
+            const feature_extractor& fe_
+        ) : fe(fe_)
+        {
+            set_defaults();
+        }
+
+        structural_sequence_labeling_trainer (
+        )
+        {
+            set_defaults();
+        }
+
+        const feature_extractor& get_feature_extractor (
+        ) const { return fe; }
+
+        unsigned long num_labels (
+        ) const { return fe.num_labels(); }
+
+        void set_num_threads (
+            unsigned long num
+        )
+        {
+            num_threads = num;
+        }
+
+        unsigned long get_num_threads (
+        ) const
+        {
+            return num_threads;
+        }
+
+        void set_epsilon (
+            double eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void structural_sequence_labeling_trainer::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_;
+        }
+
+        double get_epsilon (
+        ) const { return eps; }
+
+        unsigned long get_max_iterations (
+        ) const { return max_iterations; }
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ) 
+        {
+            max_iterations = max_iter;
+        }
+
+        void set_max_cache_size (
+            unsigned long max_size
+        )
+        {
+            max_cache_size = max_size;
+        }
+
+        unsigned long get_max_cache_size (
+        ) const
+        {
+            return max_cache_size; 
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void set_oca (
+            const oca& item
+        )
+        {
+            solver = item;
+        }
+
+        const oca get_oca (
+        ) const
+        {
+            return solver;
+        }
+
+        void set_c (
+            double C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void structural_sequence_labeling_trainer::set_c()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+        }
+
+        double get_c (
+        ) const
+        {
+            return C;
+        }
+
+        double get_loss (
+            unsigned long label
+        ) const 
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(label < num_labels(),
+                        "\t void structural_sequence_labeling_trainer::get_loss()"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t label:        " << label 
+                        << "\n\t num_labels(): " << num_labels() 
+                        << "\n\t this:         " << this
+                        );
+
+            return loss_values[label]; 
+        }
+
+        void set_loss (
+            unsigned long label,
+            double value
+        )  
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(label < num_labels() && value >= 0,
+                        "\t void structural_sequence_labeling_trainer::set_loss()"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t label:        " << label 
+                        << "\n\t num_labels(): " << num_labels() 
+                        << "\n\t value:        " << value 
+                        << "\n\t this:         " << this
+                        );
+
+            loss_values[label] = value;
+        }
+
+
+        const sequence_labeler<feature_extractor> train(
+            const std::vector<sample_sequence_type>& x,
+            const std::vector<labeled_sequence_type>& y
+        ) const
+        {
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_sequence_labeling_problem(x,y) == true &&
+                        contains_invalid_labeling(get_feature_extractor(), x, y) == false,
+                        "\t sequence_labeler structural_sequence_labeling_trainer::train(x,y)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t x.size(): " << x.size() 
+                        << "\n\t is_sequence_labeling_problem(x,y): " << is_sequence_labeling_problem(x,y)
+                        << "\n\t contains_invalid_labeling(get_feature_extractor(),x,y): " << contains_invalid_labeling(get_feature_extractor(),x,y)
+                        << "\n\t this: " << this
+            );
+
+#ifdef ENABLE_ASSERTS
+            for (unsigned long i = 0; i < y.size(); ++i)
+            {
+                for (unsigned long j = 0; j < y[i].size(); ++j)
+                {
+                    // make sure requires clause is not broken
+                    DLIB_ASSERT(y[i][j] < num_labels(),
+                                "\t sequence_labeler structural_sequence_labeling_trainer::train(x,y)"
+                                << "\n\t The given labels in y are invalid."
+                                << "\n\t y[i][j]: " << y[i][j] 
+                                << "\n\t num_labels(): " << num_labels()
+                                << "\n\t i: " << i 
+                                << "\n\t j: " << j 
+                                << "\n\t this: " << this
+                    );
+                }
+            }
+#endif
+
+
+
+
+            structural_svm_sequence_labeling_problem<feature_extractor> prob(x, y, fe, num_threads);
+            matrix<double,0,1> weights; 
+            if (verbose)
+                prob.be_verbose();
+
+            prob.set_epsilon(eps);
+            prob.set_max_iterations(max_iterations);
+            prob.set_c(C);
+            prob.set_max_cache_size(max_cache_size);
+            for (unsigned long i = 0; i < loss_values.size(); ++i)
+                prob.set_loss(i,loss_values[i]);
+
+            solver(prob, weights, num_nonnegative_weights(fe));
+
+            return sequence_labeler<feature_extractor>(weights,fe);
+        }
+
+    private:
+
+        double C;
+        oca solver;
+        double eps;
+        unsigned long max_iterations;
+        bool verbose;
+        unsigned long num_threads;
+        unsigned long max_cache_size;
+        std::vector<double> loss_values;
+
+        void set_defaults ()
+        {
+            C = 100;
+            verbose = false;
+            eps = 0.1;
+            max_iterations = 10000;
+            num_threads = 2;
+            max_cache_size = 5;
+            loss_values.assign(num_labels(), 1);
+        }
+
+        feature_extractor fe;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_Hh_
+
+
+
diff --git a/ml/dlib/dlib/svm/structural_sequence_labeling_trainer_abstract.h b/ml/dlib/dlib/svm/structural_sequence_labeling_trainer_abstract.h
new file mode 100644
index 000000000..43e5f5131
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_sequence_labeling_trainer_abstract.h
@@ -0,0 +1,266 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_ABSTRACT_Hh_
+#ifdef DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_ABSTRACT_Hh_
+
+#include "../algs.h"
+#include "../optimization.h"
+#include "structural_svm_sequence_labeling_problem_abstract.h"
+#include "sequence_labeler_abstract.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class structural_sequence_labeling_trainer
+    {
+        /*!
+            REQUIREMENTS ON feature_extractor
+                It must be an object that implements an interface compatible with 
+                the example_feature_extractor defined in dlib/svm/sequence_labeler_abstract.h.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for learning to do sequence labeling based
+                on a set of training data.  The training procedure produces a
+                sequence_labeler object which can be used to predict the labels of
+                new data sequences.
+
+                Note that this is just a convenience wrapper around the 
+                structural_svm_sequence_labeling_problem to make it look 
+                similar to all the other trainers in dlib.  
+        !*/
+
+    public:
+        typedef typename feature_extractor::sequence_type sample_sequence_type;
+        typedef std::vector<unsigned long> labeled_sequence_type;
+        typedef sequence_labeler<feature_extractor> trained_function_type;
+
+        structural_sequence_labeling_trainer (
+        );
+        /*!
+            ensures
+                - #get_c() == 100
+                - this object isn't verbose
+                - #get_epsilon() == 0.1
+                - #get_max_iterations() == 10000
+                - #get_num_threads() == 2
+                - #get_max_cache_size() == 5
+                - #get_feature_extractor() == a default initialized feature_extractor
+        !*/
+
+        explicit structural_sequence_labeling_trainer (
+            const feature_extractor& fe
+        );
+        /*!
+            ensures
+                - #get_c() == 100
+                - this object isn't verbose
+                - #get_epsilon() == 0.1
+                - #get_max_iterations() == 10000
+                - #get_num_threads() == 2
+                - #get_max_cache_size() == 5
+                - #get_feature_extractor() == fe 
+        !*/
+
+        const feature_extractor& get_feature_extractor (
+        ) const;
+        /*!
+            ensures
+                - returns the feature extractor used by this object
+        !*/
+
+        unsigned long num_labels (
+        ) const; 
+        /*!
+            ensures
+                - returns get_feature_extractor().num_labels()
+                  (i.e. returns the number of possible output labels for each 
+                  element of a sequence)
+        !*/
+
+        void set_num_threads (
+            unsigned long num
+        );
+        /*!
+            ensures
+                - #get_num_threads() == num
+        !*/
+
+        unsigned long get_num_threads (
+        ) const;
+        /*!
+            ensures
+                - returns the number of threads used during training.  You should 
+                  usually set this equal to the number of processing cores on your
+                  machine.
+        !*/
+
+        void set_epsilon (
+            double eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        const double get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer 
+                  to train.  You can think of this epsilon value as saying "solve the 
+                  optimization problem until the average number of labeling mistakes per 
+                  training sample is within epsilon of its optimal value".
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        unsigned long get_max_iterations (
+        ); 
+        /*!
+            ensures
+                - returns the maximum number of iterations the SVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        void set_max_cache_size (
+            unsigned long max_size
+        );
+        /*!
+            ensures
+                - #get_max_cache_size() == max_size
+        !*/
+
+        unsigned long get_max_cache_size (
+        ) const;
+        /*!
+            ensures
+                - During training, this object basically runs the sequence_labeler on 
+                  each training sample, over and over.  To speed this up, it is possible to 
+                  cache the results of these labeler invocations.  This function returns the 
+                  number of cache elements per training sample kept in the cache.  Note 
+                  that a value of 0 means caching is not used at all.  
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the optimizer used to solve the structural SVM problem.  
+        !*/
+
+        void set_c (
+            double C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() = C
+        !*/
+
+        double get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter 
+                  that determines the trade-off between trying to fit the training 
+                  data (i.e. minimize the loss) or allowing more errors but hopefully 
+                  improving the generalization of the resulting sequence labeler.  Larger 
+                  values encourage exact fitting while smaller values of C may encourage 
+                  better generalization. 
+        !*/
+
+        double get_loss (
+            unsigned long label
+        ) const;
+        /*!
+            requires
+                - label < num_labels()
+            ensures
+                - returns the loss incurred when a sequence element with the given
+                  label is misclassified.  This value controls how much we care about
+                  correctly classifying this type of label.  Larger loss values indicate
+                  that we care more strongly than smaller values.
+        !*/
+
+        void set_loss (
+            unsigned long label,
+            double value
+        );
+        /*!
+            requires
+                - label < num_labels()
+                - value >= 0
+            ensures
+                - #get_loss(label) == value
+        !*/
+
+        const sequence_labeler<feature_extractor> train(
+            const std::vector<sample_sequence_type>& x,
+            const std::vector<labeled_sequence_type>& y
+        ) const;
+        /*!
+            requires
+                - is_sequence_labeling_problem(x, y) == true
+                - contains_invalid_labeling(get_feature_extractor(), x, y) == false
+                - for all valid i and j: y[i][j] < num_labels()
+            ensures
+                - Uses the structural_svm_sequence_labeling_problem to train a 
+                  sequence_labeler on the given x/y training pairs.  The idea is 
+                  to learn to predict a y given an input x.
+                - returns a function F with the following properties:
+                    - F(new_x) == A sequence of predicted labels for the elements of new_x.  
+                    - F(new_x).size() == new_x.size()
+                    - for all valid i:
+                        - F(new_x)[i] == the predicted label of new_x[i]
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SEQUENCE_LABELING_TRAiNER_ABSTRACT_Hh_
+
+
+
+
diff --git a/ml/dlib/dlib/svm/structural_sequence_segmentation_trainer.h b/ml/dlib/dlib/svm/structural_sequence_segmentation_trainer.h
new file mode 100644
index 000000000..2e0214008
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_sequence_segmentation_trainer.h
@@ -0,0 +1,281 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_SEQUENCE_sEGMENTATION_TRAINER_Hh_
+#define DLIB_STRUCTURAL_SEQUENCE_sEGMENTATION_TRAINER_Hh_
+
+#include "structural_sequence_segmentation_trainer_abstract.h"
+#include "structural_sequence_labeling_trainer.h"
+#include "sequence_segmenter.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class structural_sequence_segmentation_trainer
+    {
+    public:
+        typedef typename feature_extractor::sequence_type sample_sequence_type;
+        typedef std::vector<std::pair<unsigned long, unsigned long> > segmented_sequence_type;
+
+        typedef sequence_segmenter<feature_extractor> trained_function_type;
+
+        explicit structural_sequence_segmentation_trainer (
+            const feature_extractor& fe_
+        ) : trainer(impl_ss::feature_extractor<feature_extractor>(fe_))
+        {
+            loss_per_missed_segment = 1;
+            loss_per_false_alarm = 1;
+        }
+
+        structural_sequence_segmentation_trainer (
+        )
+        {
+            loss_per_missed_segment = 1;
+            loss_per_false_alarm = 1;
+        }
+
+        const feature_extractor& get_feature_extractor (
+        ) const { return trainer.get_feature_extractor().fe; }
+
+        void set_num_threads (
+            unsigned long num
+        )
+        {
+            trainer.set_num_threads(num);
+        }
+
+        unsigned long get_num_threads (
+        ) const
+        {
+            return trainer.get_num_threads();
+        }
+
+        void set_epsilon (
+            double eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void structural_sequence_segmentation_trainer::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            trainer.set_epsilon(eps_);
+        }
+
+        double get_epsilon (
+        ) const { return trainer.get_epsilon(); }
+
+        unsigned long get_max_iterations (
+        ) const { return trainer.get_max_iterations(); }
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ) 
+        {
+            trainer.set_max_iterations(max_iter);
+        }
+
+        void set_max_cache_size (
+            unsigned long max_size
+        )
+        {
+            trainer.set_max_cache_size(max_size);
+        }
+
+        unsigned long get_max_cache_size (
+        ) const
+        {
+            return trainer.get_max_cache_size();
+        }
+
+        void be_verbose (
+        )
+        {
+            trainer.be_verbose();
+        }
+
+        void be_quiet (
+        )
+        {
+            trainer.be_quiet();
+        }
+
+        void set_oca (
+            const oca& item
+        )
+        {
+            trainer.set_oca(item);
+        }
+
+        const oca get_oca (
+        ) const
+        {
+            return trainer.get_oca();
+        }
+
+        void set_c (
+            double C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void structural_sequence_segmentation_trainer::set_c()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            trainer.set_c(C_);
+        }
+
+        double get_c (
+        ) const
+        {
+            return trainer.get_c();
+        }
+
+        void set_loss_per_missed_segment (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss >= 0,
+                        "\t void structural_sequence_segmentation_trainer::set_loss_per_missed_segment(loss)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t loss: " << loss
+                        << "\n\t this: " << this
+            );
+
+            loss_per_missed_segment = loss;
+
+            if (feature_extractor::use_BIO_model)
+            {
+                trainer.set_loss(impl_ss::BEGIN,  loss_per_missed_segment);
+                trainer.set_loss(impl_ss::INSIDE, loss_per_missed_segment);
+            }
+            else
+            {
+                trainer.set_loss(impl_ss::BEGIN,  loss_per_missed_segment);
+                trainer.set_loss(impl_ss::INSIDE, loss_per_missed_segment);
+                trainer.set_loss(impl_ss::LAST,   loss_per_missed_segment);
+                trainer.set_loss(impl_ss::UNIT,   loss_per_missed_segment);
+            }
+        }
+
+        double get_loss_per_missed_segment (
+        ) const
+        {
+            return loss_per_missed_segment;
+        }
+
+        void set_loss_per_false_alarm (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss >= 0,
+                        "\t void structural_sequence_segmentation_trainer::set_loss_per_false_alarm(loss)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t loss: " << loss
+                        << "\n\t this: " << this
+            );
+
+            loss_per_false_alarm = loss;
+
+            trainer.set_loss(impl_ss::OUTSIDE,  loss_per_false_alarm);
+        }
+
+        double get_loss_per_false_alarm (
+        ) const
+        {
+            return loss_per_false_alarm;
+        }
+
+        const sequence_segmenter<feature_extractor> train(
+            const std::vector<sample_sequence_type>& x,
+            const std::vector<segmented_sequence_type>& y
+        ) const
+        {
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_sequence_segmentation_problem(x,y) == true,
+                        "\t sequence_segmenter structural_sequence_segmentation_trainer::train(x,y)"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t x.size(): " << x.size() 
+                        << "\n\t is_sequence_segmentation_problem(x,y): " << is_sequence_segmentation_problem(x,y)
+                        << "\n\t this: " << this
+            );
+
+            std::vector<std::vector<unsigned long> > labels(y.size());
+            if (feature_extractor::use_BIO_model)
+            {
+                // convert y into tagged BIO labels
+                for (unsigned long i = 0; i < labels.size(); ++i)
+                {
+                    labels[i].resize(x[i].size(), impl_ss::OUTSIDE);
+                    for (unsigned long j = 0; j < y[i].size(); ++j)
+                    {
+                        const unsigned long begin = y[i][j].first;
+                        const unsigned long end = y[i][j].second;
+                        if (begin != end)
+                        {
+                            labels[i][begin] = impl_ss::BEGIN;
+                            for (unsigned long k = begin+1; k < end; ++k)
+                                labels[i][k] = impl_ss::INSIDE;
+                        }
+                    }
+                }
+            }
+            else
+            {
+                // convert y into tagged BILOU labels
+                for (unsigned long i = 0; i < labels.size(); ++i)
+                {
+                    labels[i].resize(x[i].size(), impl_ss::OUTSIDE);
+                    for (unsigned long j = 0; j < y[i].size(); ++j)
+                    {
+                        const unsigned long begin = y[i][j].first;
+                        const unsigned long end = y[i][j].second;
+                        if (begin != end)
+                        {
+                            if (begin+1==end)
+                            {
+                                labels[i][begin] = impl_ss::UNIT;
+                            }
+                            else
+                            {
+                                labels[i][begin] = impl_ss::BEGIN;
+                                for (unsigned long k = begin+1; k+1 < end; ++k)
+                                    labels[i][k] = impl_ss::INSIDE;
+                                labels[i][end-1] = impl_ss::LAST;
+                            }
+                        }
+                    }
+                }
+            }
+
+            sequence_labeler<impl_ss::feature_extractor<feature_extractor> > temp;
+            temp = trainer.train(x, labels);
+            return sequence_segmenter<feature_extractor>(temp.get_weights(), trainer.get_feature_extractor().fe);
+        }
+
+    private:
+
+        structural_sequence_labeling_trainer<impl_ss::feature_extractor<feature_extractor> > trainer;
+        double loss_per_missed_segment;
+        double loss_per_false_alarm;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SEQUENCE_sEGMENTATION_TRAINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/structural_sequence_segmentation_trainer_abstract.h b/ml/dlib/dlib/svm/structural_sequence_segmentation_trainer_abstract.h
new file mode 100644
index 000000000..bcd927ca6
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_sequence_segmentation_trainer_abstract.h
@@ -0,0 +1,264 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_SEQUENCE_sEGMENTATION_TRAINER_ABSTRACT_Hh_
+#ifdef DLIB_STRUCTURAL_SEQUENCE_sEGMENTATION_TRAINER_ABSTRACT_Hh_
+
+#include "sequence_segmenter_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class structural_sequence_segmentation_trainer
+    {
+        /*!
+            REQUIREMENTS ON feature_extractor
+                It must be an object that implements an interface compatible with 
+                the example_feature_extractor defined in dlib/svm/sequence_segmenter_abstract.h.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for learning to do sequence segmentation based on a
+                set of training data.  The training procedure produces a sequence_segmenter
+                object which can be used to identify the sub-segments of new data
+                sequences.
+
+                This object internally uses the structural_sequence_labeling_trainer to
+                solve the learning problem.  
+        !*/
+
+    public:
+
+        typedef typename feature_extractor::sequence_type sample_sequence_type;
+        typedef std::vector<std::pair<unsigned long, unsigned long> > segmented_sequence_type;
+
+        typedef sequence_segmenter<feature_extractor> trained_function_type;
+
+        structural_sequence_segmentation_trainer (
+        );
+        /*!
+            ensures
+                - #get_c() == 100
+                - this object isn't verbose
+                - #get_epsilon() == 0.1
+                - #get_max_iterations() == 10000
+                - #get_num_threads() == 2
+                - #get_max_cache_size() == 40
+                - #get_feature_extractor() == a default initialized feature_extractor
+                - #get_loss_per_missed_segment() == 1
+                - #get_loss_per_false_alarm() == 1
+        !*/
+
+        explicit structural_sequence_segmentation_trainer (
+            const feature_extractor& fe
+        );
+        /*!
+            ensures
+                - #get_c() == 100
+                - this object isn't verbose
+                - #get_epsilon() == 0.1
+                - #get_max_iterations() == 10000
+                - #get_num_threads() == 2
+                - #get_max_cache_size() == 40
+                - #get_feature_extractor() == fe 
+                - #get_loss_per_missed_segment() == 1
+                - #get_loss_per_false_alarm() == 1
+        !*/
+
+        const feature_extractor& get_feature_extractor (
+        ) const;
+        /*!
+            ensures
+                - returns the feature extractor used by this object
+        !*/
+
+        void set_num_threads (
+            unsigned long num
+        );
+        /*!
+            ensures
+                - #get_num_threads() == num
+        !*/
+
+        unsigned long get_num_threads (
+        ) const;
+        /*!
+            ensures
+                - returns the number of threads used during training.  You should 
+                  usually set this equal to the number of processing cores on your
+                  machine.
+        !*/
+
+        void set_epsilon (
+            double eps_
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        double get_epsilon (
+        ) const; 
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer 
+                  to train.  You can think of this epsilon value as saying "solve the 
+                  optimization problem until the average number of segmentation mistakes
+                  per training sample is within epsilon of its optimal value".
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        unsigned long get_max_iterations (
+        ); 
+        /*!
+            ensures
+                - returns the maximum number of iterations the SVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        void set_max_cache_size (
+            unsigned long max_size
+        );
+        /*!
+            ensures
+                - #get_max_cache_size() == max_size
+        !*/
+
+        unsigned long get_max_cache_size (
+        ) const;
+        /*!
+            ensures
+                - During training, this object basically runs the sequence_segmenter on
+                  each training sample, over and over.  To speed this up, it is possible to
+                  cache the results of these segmenter invocations.  This function returns
+                  the number of cache elements per training sample kept in the cache.  Note
+                  that a value of 0 means caching is not used at all.  
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a user can
+                  observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the optimizer used to solve the structural SVM problem.  
+        !*/
+
+        void set_c (
+            double C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() = C
+        !*/
+
+        double get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter that
+                  determines the trade-off between trying to fit the training data (i.e.
+                  minimize the loss) or allowing more errors but hopefully improving the
+                  generalization of the resulting sequence labeler.  Larger values
+                  encourage exact fitting while smaller values of C may encourage better
+                  generalization. 
+        !*/
+
+        void set_loss_per_missed_segment (
+            double loss
+        );
+        /*!
+            requires
+                - loss >= 0
+            ensures
+                - #get_loss_per_missed_segment() == loss
+        !*/
+
+        double get_loss_per_missed_segment (
+        ) const;
+        /*!
+            ensures
+                - returns the amount of loss incurred for failing to detect a segment.  The
+                  larger the loss the more important it is to detect all the segments.
+        !*/
+
+
+        void set_loss_per_false_alarm (
+            double loss
+        );
+        /*!
+            requires
+                - loss >= 0
+            ensures
+                - #get_loss_per_false_alarm() == loss
+        !*/
+
+        double get_loss_per_false_alarm (
+        ) const;
+        /*!
+            ensures
+                - returns the amount of loss incurred for outputting a false detection. The
+                  larger the loss the more important it is to avoid outputting false
+                  detections.
+        !*/
+
+        const sequence_segmenter<feature_extractor> train(
+            const std::vector<sample_sequence_type>& x,
+            const std::vector<segmented_sequence_type>& y
+        ) const;
+        /*!
+            requires
+                - is_sequence_segmentation_problem(x, y) == true
+            ensures
+                - Uses the given training data to learn to do sequence segmentation.  That
+                  is, this function will try to find a sequence_segmenter capable of
+                  predicting y[i] when given x[i] as input.  Moreover, it should also be
+                  capable of predicting the segmentation of new input sequences.  Or in
+                  other words, the learned sequence_segmenter should also generalize to new
+                  data outside the training dataset.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SEQUENCE_sEGMENTATION_TRAINER_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/structural_svm_assignment_problem.h b/ml/dlib/dlib/svm/structural_svm_assignment_problem.h
new file mode 100644
index 000000000..963af1631
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_assignment_problem.h
@@ -0,0 +1,288 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_Hh_
+#define DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_Hh_
+
+
+#include "structural_svm_assignment_problem_abstract.h"
+#include "../matrix.h"
+#include <vector>
+#include <iterator>
+#include "structural_svm_problem_threaded.h"
+
+// ----------------------------------------------------------------------------------------
+
+namespace dlib
+{
+    template <long n, typename T>
+    struct column_matrix_static_resize
+    {
+        typedef T type;
+    };
+
+    template <long n, typename T, long NR, long NC, typename MM, typename L>
+    struct column_matrix_static_resize<n, matrix<T,NR,NC,MM,L> >
+    {
+        typedef matrix<T,NR+n,NC,MM,L> type;
+    };
+
+    template <long n, typename T, long NC, typename MM, typename L>
+    struct column_matrix_static_resize<n, matrix<T,0,NC,MM,L> >
+    {
+        typedef matrix<T,0,NC,MM,L> type;
+    };
+
+    template <typename T>
+    struct add_one_to_static_feat_size
+    {
+        typedef typename column_matrix_static_resize<1,typename T::feature_vector_type>::type type;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class structural_svm_assignment_problem : noncopyable,
+        public structural_svm_problem_threaded<matrix<double,0,1>, typename add_one_to_static_feat_size<feature_extractor>::type >
+    {
+    public:
+        typedef matrix<double,0,1> matrix_type;
+        typedef typename add_one_to_static_feat_size<feature_extractor>::type feature_vector_type;
+
+        typedef typename feature_extractor::lhs_element lhs_element;
+        typedef typename feature_extractor::rhs_element rhs_element;
+
+
+        typedef std::pair<std::vector<lhs_element>, std::vector<rhs_element> > sample_type;
+
+        typedef std::vector<long> label_type;
+
+        structural_svm_assignment_problem(
+            const std::vector<sample_type>& samples_,
+            const std::vector<label_type>& labels_,
+            const feature_extractor& fe_,
+            bool force_assignment_,
+            unsigned long num_threads,
+            const double loss_per_false_association_,
+            const double loss_per_missed_association_
+        ) :
+            structural_svm_problem_threaded<matrix_type,feature_vector_type>(num_threads),
+            samples(samples_),
+            labels(labels_),
+            fe(fe_),
+            force_assignment(force_assignment_),
+            loss_per_false_association(loss_per_false_association_),
+            loss_per_missed_association(loss_per_missed_association_)
+        {
+            // make sure requires clause is not broken
+#ifdef ENABLE_ASSERTS
+            DLIB_ASSERT(loss_per_false_association > 0 && loss_per_missed_association > 0,
+                "\t structural_svm_assignment_problem::structural_svm_assignment_problem()"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t loss_per_false_association:  " << loss_per_false_association
+                << "\n\t loss_per_missed_association: " << loss_per_missed_association
+                << "\n\t this: " << this
+            );
+            if (force_assignment)
+            {
+                DLIB_ASSERT(is_forced_assignment_problem(samples, labels),
+                            "\t structural_svm_assignment_problem::structural_svm_assignment_problem()"
+                            << "\n\t invalid inputs were given to this function"
+                            << "\n\t is_forced_assignment_problem(samples,labels): " << is_forced_assignment_problem(samples,labels)
+                            << "\n\t is_assignment_problem(samples,labels):        " << is_assignment_problem(samples,labels)
+                            << "\n\t is_learning_problem(samples,labels):          " << is_learning_problem(samples,labels)
+                            << "\n\t this: " << this
+                            );
+            }
+            else
+            {
+                DLIB_ASSERT(is_assignment_problem(samples, labels),
+                            "\t structural_svm_assignment_problem::structural_svm_assignment_problem()"
+                            << "\n\t invalid inputs were given to this function"
+                            << "\n\t is_assignment_problem(samples,labels): " << is_assignment_problem(samples,labels)
+                            << "\n\t is_learning_problem(samples,labels):   " << is_learning_problem(samples,labels)
+                            << "\n\t this: " << this
+                            );
+            }
+#endif
+
+        }
+
+    private:
+        virtual long get_num_dimensions (
+        ) const 
+        {
+            return fe.num_features()+1; // +1 for the bias term
+        }
+
+        virtual long get_num_samples (
+        ) const 
+        {
+            return samples.size();
+        }
+
+        template <typename psi_type>
+        typename enable_if<is_matrix<psi_type> >::type get_joint_feature_vector (
+            const sample_type& sample, 
+            const label_type& label,
+            psi_type& psi
+        ) const 
+        {
+            typename feature_extractor::feature_vector_type feats;
+            psi.set_size(get_num_dimensions());
+            psi = 0;
+            for (unsigned long i = 0; i < sample.first.size(); ++i)
+            {
+                if (label[i] != -1)
+                {
+                    fe.get_features(sample.first[i], sample.second[label[i]], feats);
+                    set_rowm(psi,range(0,feats.size()-1)) += feats;
+                    psi(get_num_dimensions()-1) += 1;
+                }
+            }
+        }
+
+        template <typename T>
+        void append_to_sparse_vect (
+            T& psi,
+            const T& vect
+        ) const
+        {
+            std::copy(vect.begin(), vect.end(), std::back_inserter(psi));
+        }
+
+        template <typename psi_type>
+        typename disable_if<is_matrix<psi_type> >::type get_joint_feature_vector (
+            const sample_type& sample, 
+            const label_type& label,
+            psi_type& psi
+        ) const 
+        {
+            psi.clear();
+            feature_vector_type feats;
+            int num_assignments = 0;
+            for (unsigned long i = 0; i < sample.first.size(); ++i)
+            {
+                if (label[i] != -1)
+                {
+                    fe.get_features(sample.first[i], sample.second[label[i]], feats);
+                    append_to_sparse_vect(psi, feats);
+                    ++num_assignments;
+                }
+            }
+            psi.push_back(std::make_pair(get_num_dimensions()-1,num_assignments));
+        }
+
+        virtual void get_truth_joint_feature_vector (
+            long idx,
+            feature_vector_type& psi 
+        ) const 
+        {
+            get_joint_feature_vector(samples[idx], labels[idx], psi);
+        }
+
+        virtual void separation_oracle (
+            const long idx,
+            const matrix_type& current_solution,
+            double& loss,
+            feature_vector_type& psi
+        ) const
+        {
+            matrix<double> cost;
+            unsigned long size;
+            if (force_assignment)
+            {
+                unsigned long lhs_size = samples[idx].first.size();
+                unsigned long rhs_size = samples[idx].second.size();
+                size = std::max(lhs_size, rhs_size);
+            }
+            else
+            {
+                unsigned long rhs_size = samples[idx].second.size() + samples[idx].first.size();
+                size = rhs_size;
+            }
+            cost.set_size(size, size);
+
+            typename feature_extractor::feature_vector_type feats;
+
+            // now fill out the cost assignment matrix
+            for (long r = 0; r < cost.nr(); ++r)
+            {
+                for (long c = 0; c < cost.nc(); ++c)
+                {
+                    if (r < (long)samples[idx].first.size())
+                    {
+                        if (c < (long)samples[idx].second.size())
+                        {
+                            fe.get_features(samples[idx].first[r], samples[idx].second[c], feats);
+                            const double bias = current_solution(current_solution.size()-1);
+                            cost(r,c) = dot(colm(current_solution,0,current_solution.size()-1), feats) + bias;
+
+                            // add in the loss since this corresponds to an incorrect prediction.
+                            if (c != labels[idx][r])
+                            {
+                                cost(r,c) += loss_per_false_association;
+                            }
+                        }
+                        else
+                        {
+                            if (labels[idx][r] == -1)
+                                cost(r,c) = 0;
+                            else
+                                cost(r,c) = loss_per_missed_association; 
+                        }
+
+                    }
+                    else
+                    {
+                        cost(r,c) = 0;
+                    }
+                }
+            }
+
+            std::vector<long> assignment;
+
+            if (cost.size() != 0)
+            {
+                // max_cost_assignment() only works with integer matrices, so convert from
+                // double to integer.
+                const double scale = (std::numeric_limits<dlib::int64>::max()/1000)/max(abs(cost));
+                matrix<dlib::int64> int_cost = matrix_cast<dlib::int64>(round(cost*scale));
+                assignment = max_cost_assignment(int_cost);
+                assignment.resize(samples[idx].first.size());
+            }
+
+            loss = 0;
+            // adjust assignment so that non-assignments have a value of -1. Also compute loss.
+            for (unsigned long i = 0; i < assignment.size(); ++i)
+            {
+                if (assignment[i] >= (long)samples[idx].second.size())
+                    assignment[i] = -1;
+
+                if (assignment[i] != labels[idx][i])
+                {
+                    if (assignment[i] == -1)
+                        loss += loss_per_missed_association;
+                    else
+                        loss += loss_per_false_association;
+                }
+            }
+
+            get_joint_feature_vector(samples[idx], assignment, psi);
+        }
+
+        const std::vector<sample_type>& samples;
+        const std::vector<label_type>& labels;
+        const feature_extractor& fe;
+        bool force_assignment;
+        const double loss_per_false_association;
+        const double loss_per_missed_association;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_Hh_
+
diff --git a/ml/dlib/dlib/svm/structural_svm_assignment_problem_abstract.h b/ml/dlib/dlib/svm/structural_svm_assignment_problem_abstract.h
new file mode 100644
index 000000000..c06190726
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_assignment_problem_abstract.h
@@ -0,0 +1,87 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_ABSTRACT_Hh_
+#ifdef DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_ABSTRACT_Hh_
+
+
+#include "../matrix.h"
+#include <vector>
+#include "structural_svm_problem_threaded_abstract.h"
+#include "assignment_function_abstract.h"
+
+// ----------------------------------------------------------------------------------------
+
+namespace dlib
+{
+
+    template <
+        typename feature_extractor
+        >
+    class structural_svm_assignment_problem : noncopyable,
+                                              public structural_svm_problem_threaded<matrix<double,0,1>, 
+                                                     typename feature_extractor::feature_vector_type >
+    {
+        /*!
+            REQUIREMENTS ON feature_extractor
+                It must be an object that implements an interface compatible with 
+                the example_feature_extractor defined in dlib/svm/assignment_function_abstract.h.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for learning the parameters needed to use an
+                assignment_function object.  It learns the parameters by formulating the
+                problem as a structural SVM problem.  
+        !*/
+
+    public:
+        typedef matrix<double,0,1> matrix_type;
+        typedef typename feature_extractor::feature_vector_type feature_vector_type;
+        typedef typename feature_extractor::lhs_element lhs_element;
+        typedef typename feature_extractor::rhs_element rhs_element;
+        typedef std::pair<std::vector<lhs_element>, std::vector<rhs_element> > sample_type;
+        typedef std::vector<long> label_type;
+
+        structural_svm_assignment_problem(
+            const std::vector<sample_type>& samples,
+            const std::vector<label_type>& labels,
+            const feature_extractor& fe,
+            bool force_assignment,
+            unsigned long num_threads,
+            const double loss_per_false_association,
+            const double loss_per_missed_association
+        );
+        /*!
+            requires
+                - loss_per_false_association > 0
+                - loss_per_missed_association > 0
+                - is_assignment_problem(samples,labels) == true
+                - if (force_assignment) then
+                    - is_forced_assignment_problem(samples,labels) == true
+            ensures
+                - This object attempts to learn a mapping from the given samples to the 
+                  given labels.  In particular, it attempts to learn to predict labels[i] 
+                  based on samples[i].  Or in other words, this object can be used to learn 
+                  a parameter vector and bias, w and b, such that an assignment_function declared as:
+                    assignment_function<feature_extractor> assigner(w,b,fe,force_assignment)
+                  results in an assigner object which attempts to compute the following mapping:
+                    labels[i] == labeler(samples[i])
+                - This object will use num_threads threads during the optimization 
+                  procedure.  You should set this parameter equal to the number of 
+                  available processing cores on your machine.
+                - When solving the structural SVM problem, we will use
+                  loss_per_false_association as the loss for incorrectly associating
+                  objects that shouldn't be associated.
+                - When solving the structural SVM problem, we will use
+                  loss_per_missed_association as the loss for failing to associate to
+                  objects that are supposed to be associated with each other.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_ASSiGNMENT_PROBLEM_ABSTRACT_Hh_
+
+
+
diff --git a/ml/dlib/dlib/svm/structural_svm_distributed.h b/ml/dlib/dlib/svm/structural_svm_distributed.h
new file mode 100644
index 000000000..a9542c70f
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_distributed.h
@@ -0,0 +1,700 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_Hh_
+#define DLIB_STRUCTURAL_SVM_DISTRIBUTeD_Hh_
+
+#include <memory>
+#include <iostream>
+#include <vector>
+
+#include "structural_svm_distributed_abstract.h"
+#include "structural_svm_problem.h"
+#include "../bridge.h"
+#include "../misc_api.h"
+#include "../statistics.h"
+#include "../threads.h"
+#include "../pipe.h"
+#include "../type_safe_union.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+
+        template <typename matrix_type>
+        struct oracle_response
+        {
+            typedef typename matrix_type::type scalar_type;
+
+            matrix_type subgradient;
+            scalar_type loss;
+            long num;
+
+            friend void swap (oracle_response& a, oracle_response& b)
+            {
+                a.subgradient.swap(b.subgradient);
+                std::swap(a.loss, b.loss);
+                std::swap(a.num, b.num);
+            }
+
+            friend void serialize (const oracle_response& item, std::ostream& out)
+            {
+                serialize(item.subgradient, out);
+                dlib::serialize(item.loss, out);
+                dlib::serialize(item.num, out);
+            }
+
+            friend void deserialize (oracle_response& item, std::istream& in)
+            {
+                deserialize(item.subgradient, in);
+                dlib::deserialize(item.loss, in);
+                dlib::deserialize(item.num, in);
+            }
+        };
+
+    // ----------------------------------------------------------------------------------------
+
+        template <typename matrix_type>
+        struct oracle_request
+        {
+            typedef typename matrix_type::type scalar_type;
+
+            matrix_type current_solution;
+            scalar_type saved_current_risk_gap;
+            bool skip_cache;
+            bool converged;
+
+            friend void swap (oracle_request& a, oracle_request& b)
+            {
+                a.current_solution.swap(b.current_solution);
+                std::swap(a.saved_current_risk_gap, b.saved_current_risk_gap);
+                std::swap(a.skip_cache, b.skip_cache);
+                std::swap(a.converged, b.converged);
+            }
+
+            friend void serialize (const oracle_request& item, std::ostream& out)
+            {
+                serialize(item.current_solution, out);
+                dlib::serialize(item.saved_current_risk_gap, out);
+                dlib::serialize(item.skip_cache, out);
+                dlib::serialize(item.converged, out);
+            }
+
+            friend void deserialize (oracle_request& item, std::istream& in)
+            {
+                deserialize(item.current_solution, in);
+                dlib::deserialize(item.saved_current_risk_gap, in);
+                dlib::deserialize(item.skip_cache, in);
+                dlib::deserialize(item.converged, in);
+            }
+        };
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    class svm_struct_processing_node : noncopyable
+    {
+    public:
+
+        template <
+            typename T,
+            typename U 
+            >
+        svm_struct_processing_node (
+            const structural_svm_problem<T,U>& problem,
+            unsigned short port,
+            unsigned short num_threads
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(port != 0 && problem.get_num_samples() != 0 &&
+                        problem.get_num_dimensions() != 0,
+                "\t svm_struct_processing_node()"
+                << "\n\t Invalid arguments were given to this function"
+                << "\n\t port: " << port 
+                << "\n\t problem.get_num_samples():    " << problem.get_num_samples() 
+                << "\n\t problem.get_num_dimensions(): " << problem.get_num_dimensions() 
+                << "\n\t this: " << this
+                );
+
+            the_problem.reset(new node_type<T,U>(problem, port, num_threads));
+        }
+
+    private:
+
+        struct base
+        {
+            virtual ~base(){}
+        };
+
+        template <
+            typename matrix_type,
+            typename feature_vector_type 
+            >
+        class node_type : public base, threaded_object
+        {
+        public:
+            typedef typename matrix_type::type scalar_type;
+
+            node_type(
+                const structural_svm_problem<matrix_type,feature_vector_type>& prob,
+                unsigned short port,
+                unsigned long num_threads
+            ) : in(3),out(3), problem(prob), tp(num_threads)
+            {
+                b.reconfigure(listen_on_port(port), receive(in), transmit(out));
+
+                start();
+            }
+
+            ~node_type()
+            {
+                in.disable();
+                out.disable();
+                wait();
+            }
+
+        private:
+
+            void thread()
+            {
+                using namespace impl;
+                tsu_in msg; 
+                tsu_out temp;
+
+                timestamper ts;
+                running_stats<double> with_buffer_time;
+                running_stats<double> without_buffer_time;
+                unsigned long num_iterations_executed = 0;
+
+                while (in.dequeue(msg))
+                {
+                    // initialize the cache and compute psi_true.
+                    if (cache.size() == 0)
+                    {
+                        cache.resize(problem.get_num_samples());
+                        for (unsigned long i = 0; i < cache.size(); ++i)
+                            cache[i].init(&problem,i);
+
+                        psi_true.set_size(problem.get_num_dimensions(),1);
+                        psi_true = 0;
+
+                        const unsigned long num = problem.get_num_samples();
+                        feature_vector_type ftemp;
+                        for (unsigned long i = 0; i < num; ++i)
+                        {
+                            cache[i].get_truth_joint_feature_vector_cached(ftemp);
+
+                            subtract_from(psi_true, ftemp);
+                        }
+                    }
+
+
+                    if (msg.template contains<bridge_status>() && 
+                        msg.template get<bridge_status>().is_connected)
+                    {
+                        temp = problem.get_num_dimensions();
+                        out.enqueue(temp);
+
+                    }
+                    else if (msg.template contains<oracle_request<matrix_type> >())
+                    {
+                        ++num_iterations_executed;
+
+                        const oracle_request<matrix_type>& req = msg.template get<oracle_request<matrix_type> >();
+
+                        oracle_response<matrix_type>& data = temp.template get<oracle_response<matrix_type> >();
+
+                        data.subgradient = psi_true;
+                        data.loss = 0;
+
+                        data.num = problem.get_num_samples();
+
+                        const uint64 start_time = ts.get_timestamp();
+
+                        // pick fastest buffering strategy
+                        bool buffer_subgradients_locally = with_buffer_time.mean() < without_buffer_time.mean();
+
+                        // every 50 iterations we should try to flip the buffering scheme to see if
+                        // doing it the other way might be better.  
+                        if ((num_iterations_executed%50) == 0)
+                        {
+                            buffer_subgradients_locally = !buffer_subgradients_locally;
+                        }
+
+                        binder b(*this, req, data, buffer_subgradients_locally);
+                        parallel_for_blocked(tp, 0, data.num, b, &binder::call_oracle);
+
+                        const uint64 stop_time = ts.get_timestamp();
+                        if (buffer_subgradients_locally)
+                            with_buffer_time.add(stop_time-start_time);
+                        else
+                            without_buffer_time.add(stop_time-start_time);
+
+                        out.enqueue(temp);
+                    }
+                }
+            }
+
+            struct binder
+            {
+                binder (
+                    const node_type& self_,
+                    const impl::oracle_request<matrix_type>& req_,
+                    impl::oracle_response<matrix_type>& data_,
+                    bool buffer_subgradients_locally_
+                ) : self(self_), req(req_), data(data_),
+                    buffer_subgradients_locally(buffer_subgradients_locally_) {}
+
+                void call_oracle (
+                    long begin,
+                    long end
+                ) 
+                {
+                    // If we are only going to call the separation oracle once then don't
+                    // run the slightly more complex for loop version of this code.  Or if
+                    // we just don't want to run the complex buffering one.  The code later
+                    // on decides if we should do the buffering based on how long it takes
+                    // to execute.  We do this because, when the subgradient is really high
+                    // dimensional it can take a lot of time to add them together.  So we
+                    // might want to avoid doing that.
+                    if (end-begin <= 1 || !buffer_subgradients_locally)
+                    {
+                        scalar_type loss;
+                        feature_vector_type ftemp;
+                        for (long i = begin; i < end; ++i)
+                        {
+                            self.cache[i].separation_oracle_cached(req.converged, 
+                                                                   req.skip_cache, 
+                                                                   req.saved_current_risk_gap,
+                                                                   req.current_solution,
+                                                                   loss,
+                                                                   ftemp);
+
+                            auto_mutex lock(self.accum_mutex);
+                            data.loss += loss;
+                            add_to(data.subgradient, ftemp);
+                        }
+                    }
+                    else
+                    {
+                        scalar_type loss = 0;
+                        matrix_type faccum(data.subgradient.size(),1);
+                        faccum = 0;
+
+                        feature_vector_type ftemp;
+
+                        for (long i = begin; i < end; ++i)
+                        {
+                            scalar_type loss_temp;
+                            self.cache[i].separation_oracle_cached(req.converged,
+                                                                   req.skip_cache, 
+                                                                   req.saved_current_risk_gap,
+                                                                   req.current_solution,
+                                                                   loss_temp,
+                                                                   ftemp);
+                            loss += loss_temp;
+                            add_to(faccum, ftemp);
+                        }
+
+                        auto_mutex lock(self.accum_mutex);
+                        data.loss += loss;
+                        add_to(data.subgradient, faccum);
+                    }
+                }
+
+                const node_type& self;
+                const impl::oracle_request<matrix_type>& req;
+                impl::oracle_response<matrix_type>& data;
+                bool buffer_subgradients_locally;
+            };
+
+
+
+            typedef type_safe_union<impl::oracle_request<matrix_type>, bridge_status> tsu_in;
+            typedef type_safe_union<impl::oracle_response<matrix_type> , long> tsu_out;
+
+            pipe<tsu_in> in;
+            pipe<tsu_out> out;
+            bridge b;
+
+            mutable matrix_type psi_true;
+            const structural_svm_problem<matrix_type,feature_vector_type>& problem;
+            mutable std::vector<cache_element_structural_svm<structural_svm_problem<matrix_type,feature_vector_type> > > cache;
+
+            mutable thread_pool tp;
+            mutex accum_mutex;
+        };
+
+
+        std::unique_ptr<base> the_problem;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class svm_struct_controller_node : noncopyable
+    {
+    public:
+
+        svm_struct_controller_node (
+        ) :
+            eps(0.001),
+            max_iterations(10000),
+            cache_based_eps(std::numeric_limits<double>::infinity()),
+            verbose(false),
+            C(1)
+        {}
+
+        double get_cache_based_epsilon (
+        ) const
+        {
+            return cache_based_eps;
+        }
+
+        void set_cache_based_epsilon (
+            double eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void svm_struct_controller_node::set_cache_based_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            cache_based_eps = eps_;
+        }
+
+        void set_epsilon (
+            double eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void svm_struct_controller_node::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_;
+        }
+
+        double get_epsilon (
+        ) const { return eps; }
+
+        unsigned long get_max_iterations (
+        ) const { return max_iterations; }
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ) 
+        {
+            max_iterations = max_iter;
+        }
+
+        void be_verbose (
+        ) 
+        {
+            verbose = true;
+        }
+
+        void be_quiet(
+        )
+        {
+            verbose = false;
+        }
+
+        void add_nuclear_norm_regularizer (
+            long first_dimension,
+            long rows,
+            long cols,
+            double regularization_strength
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 <= first_dimension  &&
+                0 <= rows && 0 <= cols && 
+                0 < regularization_strength,
+                "\t void svm_struct_controller_node::add_nuclear_norm_regularizer()"
+                << "\n\t Invalid arguments were given to this function."
+                << "\n\t first_dimension:         " << first_dimension 
+                << "\n\t rows:                    " << rows 
+                << "\n\t cols:                    " << cols 
+                << "\n\t regularization_strength: " << regularization_strength 
+                << "\n\t this: " << this
+                );
+
+            impl::nuclear_norm_regularizer temp;
+            temp.first_dimension = first_dimension;
+            temp.nr = rows;
+            temp.nc = cols;
+            temp.regularization_strength = regularization_strength;
+            nuclear_norm_regularizers.push_back(temp);
+        }
+
+        unsigned long num_nuclear_norm_regularizers (
+        ) const { return nuclear_norm_regularizers.size(); }
+
+        void clear_nuclear_norm_regularizers (
+        ) { nuclear_norm_regularizers.clear(); }
+
+
+        double get_c (
+        ) const { return C; }
+
+        void set_c (
+            double C_
+        ) 
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void svm_struct_controller_node::set_c()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_; 
+        }
+
+        void add_processing_node (
+            const network_address& addr
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(addr.port != 0,
+                "\t void svm_struct_controller_node::add_processing_node()"
+                << "\n\t Invalid inputs were given to this function"
+                << "\n\t addr.host_address:   " << addr.host_address 
+                << "\n\t addr.port: " << addr.port
+                << "\n\t this: " << this
+                );
+
+            // check if this address is already registered
+            for (unsigned long i = 0; i < nodes.size(); ++i)
+            {
+                if (nodes[i] == addr)
+                {
+                    return;
+                }
+            }
+            
+            nodes.push_back(addr);
+        }
+
+        void add_processing_node (
+            const std::string& ip_or_hostname,
+            unsigned short port
+        )
+        {
+            add_processing_node(network_address(ip_or_hostname,port));
+        }
+
+        unsigned long get_num_processing_nodes (
+        ) const
+        {
+            return nodes.size();
+        }
+
+        void remove_processing_nodes (
+        ) 
+        {
+            nodes.clear();
+        }
+
+        template <typename matrix_type>
+        double operator() (
+            const oca& solver,
+            matrix_type& w
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(get_num_processing_nodes() != 0,
+                        "\t double svm_struct_controller_node::operator()"
+                        << "\n\t You must add some processing nodes before calling this function."
+                        << "\n\t this: " << this
+            );
+
+            problem_type<matrix_type> problem(nodes);
+            problem.set_cache_based_epsilon(cache_based_eps);
+            problem.set_epsilon(eps);
+            problem.set_max_iterations(max_iterations);
+            if (verbose)
+                problem.be_verbose();
+            problem.set_c(C);
+            for (unsigned long i = 0; i < nuclear_norm_regularizers.size(); ++i)
+            {
+                problem.add_nuclear_norm_regularizer(
+                    nuclear_norm_regularizers[i].first_dimension,
+                    nuclear_norm_regularizers[i].nr,
+                    nuclear_norm_regularizers[i].nc,
+                    nuclear_norm_regularizers[i].regularization_strength);
+            }
+
+            return solver(problem, w);
+        }
+
+        class invalid_problem : public error
+        {
+        public:
+            invalid_problem(
+                const std::string& a
+            ): error(a) {}
+        };
+
+
+    private:
+
+        template <typename matrix_type_>
+        class problem_type : public structural_svm_problem<matrix_type_>
+        {
+        public:
+            typedef typename matrix_type_::type scalar_type;
+            typedef matrix_type_ matrix_type;
+
+            problem_type (
+                const std::vector<network_address>& nodes_
+            ) :
+                nodes(nodes_),
+                in(3),
+                num_dims(0)
+            {
+
+                // initialize all the transmit pipes
+                out_pipes.resize(nodes.size());
+                for (unsigned long i = 0; i < out_pipes.size(); ++i)
+                {
+                    out_pipes[i].reset(new pipe<tsu_out>(3));
+                }
+
+                // make bridges that connect to all our remote processing nodes
+                bridges.resize(nodes.size());
+                for (unsigned long i = 0; i< bridges.size(); ++i)
+                {
+                    bridges[i].reset(new bridge(connect_to(nodes[i]), 
+                                                receive(in), transmit(*out_pipes[i])));
+                }
+
+
+
+                // The remote processing nodes are supposed to all send the problem dimensionality
+                // upon connection. So get that and make sure everyone agrees on what it's supposed to be.
+                tsu_in temp;
+                unsigned long responses = 0;
+                bool seen_dim = false;
+                while (responses < nodes.size())
+                {
+                    in.dequeue(temp);
+                    if (temp.template contains<long>())
+                    {
+                        ++responses;
+                        // if this new dimension doesn't match what we have seen previously
+                        if (seen_dim && num_dims != temp.template get<long>())
+                        {
+                            throw invalid_problem("remote hosts disagree on the number of dimensions!");
+                        }
+                        seen_dim = true;
+                        num_dims = temp.template get<long>();
+                    }
+                }
+            }
+
+            // These functions are just here because the structural_svm_problem requires
+            // them, but since we are overloading get_risk() they are never called so they
+            // don't matter.
+            virtual long get_num_samples () const {return 0;}
+            virtual void get_truth_joint_feature_vector ( long , matrix_type&  ) const {}
+            virtual void separation_oracle ( const long , const matrix_type& , scalar_type& , matrix_type& ) const {}
+
+            virtual long get_num_dimensions (
+            ) const
+            {
+                return num_dims;
+            }
+
+            virtual void get_risk (
+                matrix_type& w,
+                scalar_type& risk,
+                matrix_type& subgradient
+            ) const 
+            {
+                using namespace impl;
+                subgradient.set_size(w.size(),1);
+                subgradient = 0;
+
+                // send out all the oracle requests
+                tsu_out temp_out;
+                for (unsigned long i = 0; i < out_pipes.size(); ++i)
+                {
+                    temp_out.template get<oracle_request<matrix_type> >().current_solution = w;
+                    temp_out.template get<oracle_request<matrix_type> >().saved_current_risk_gap = this->saved_current_risk_gap;
+                    temp_out.template get<oracle_request<matrix_type> >().skip_cache = this->skip_cache;
+                    temp_out.template get<oracle_request<matrix_type> >().converged = this->converged;
+                    out_pipes[i]->enqueue(temp_out);
+                }
+
+                // collect all the oracle responses  
+                long num = 0;
+                scalar_type total_loss = 0;
+                tsu_in temp_in;
+                unsigned long responses = 0;
+                while (responses < out_pipes.size())
+                {
+                    in.dequeue(temp_in);
+                    if (temp_in.template contains<oracle_response<matrix_type> >())
+                    {
+                        ++responses;
+                        const oracle_response<matrix_type>& data = temp_in.template get<oracle_response<matrix_type> >();
+                        subgradient += data.subgradient; 
+                        total_loss += data.loss;
+                        num += data.num;
+                    }
+                }
+
+                subgradient /= num;
+                total_loss /= num;
+                risk = total_loss + dot(subgradient,w);
+
+                if (this->nuclear_norm_regularizers.size() != 0)
+                {
+                    matrix_type grad; 
+                    double obj;
+                    this->compute_nuclear_norm_parts(w, grad, obj);
+                    risk += obj;
+                    subgradient += grad;
+                }
+            }
+
+            std::vector<network_address> nodes;
+
+            typedef type_safe_union<impl::oracle_request<matrix_type> > tsu_out;
+            typedef type_safe_union<impl::oracle_response<matrix_type>, long> tsu_in;
+
+            std::vector<std::shared_ptr<pipe<tsu_out> > > out_pipes;
+            mutable pipe<tsu_in> in;
+            std::vector<std::shared_ptr<bridge> > bridges;
+            long num_dims;
+        };
+
+        std::vector<network_address> nodes;
+        double eps;
+        unsigned long max_iterations;
+        double cache_based_eps;
+        bool verbose;
+        double C;
+        std::vector<impl::nuclear_norm_regularizer> nuclear_norm_regularizers;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_DISTRIBUTeD_Hh_
+
diff --git a/ml/dlib/dlib/svm/structural_svm_distributed_abstract.h b/ml/dlib/dlib/svm/structural_svm_distributed_abstract.h
new file mode 100644
index 000000000..175a643c8
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_distributed_abstract.h
@@ -0,0 +1,357 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_
+#ifdef DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_
+
+
+#include "structural_svm_problem_abstract.h"
+#include "../optimization/optimization_oca_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class svm_struct_processing_node : noncopyable
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for distributing the work involved in solving
+                a dlib::structural_svm_problem across many computers.  It is used in 
+                conjunction with the svm_struct_controller_node defined below.
+        !*/
+
+    public:
+
+        template <
+            typename T,
+            typename U 
+            >
+        svm_struct_processing_node (
+            const structural_svm_problem<T,U>& problem,
+            unsigned short port,
+            unsigned short num_threads
+        );
+        /*!
+            requires
+                - port != 0
+                - problem.get_num_samples() != 0
+                - problem.get_num_dimensions() != 0
+            ensures
+                - This object will listen on the given port for a TCP connection from a 
+                  svm_struct_controller_node.  Once connected, the controller node will 
+                  be able to access the given problem.
+                - Will use num_threads threads at a time to make concurrent calls to the 
+                  problem.separation_oracle() routine.  You should set this parameter equal 
+                  to the number of available processing cores.
+                - Note that the following parameters within the given problem are ignored:
+                    - problem.get_c()
+                    - problem.get_epsilon()
+                    - problem.get_cache_based_epsilon()
+                    - problem.num_nuclear_norm_regularizers()
+                    - weather the problem is verbose or not
+                  Instead, they are defined by the svm_struct_controller_node. Note, however,
+                  that the problem.get_max_cache_size() parameter is meaningful and controls
+                  the size of the separation oracle cache within a svm_struct_processing_node.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class svm_struct_controller_node : noncopyable
+    {
+        /*!
+            INITIAL VALUE
+                - get_num_processing_nodes() == 0
+                - get_epsilon() == 0.001
+                - get_max_iterations() == 10000
+                - get_c() == 1
+                - This object will not be verbose
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for distributing the work involved in solving a 
+                dlib::structural_svm_problem across many computers.  The best way to understand
+                its use is via example:
+
+                First, suppose you have defined a structural_svm_problem object by inheriting from 
+                it and defining the appropriate virtual functions.  You could solve it by passing 
+                an instance to the oca optimizer.  However, if your separation oracle takes a long 
+                time to evaluate then the optimization will take a long time to solve.  To speed 
+                this up we can distribute the calls to the separation oracle across many computers.  
+                
+                To make this concrete, lets imagine you want to distribute the work across three 
+                computers. You can accomplish this by creating four programs.  One containing a 
+                svm_struct_controller_node and three containing svm_struct_processing_nodes.
+
+                The programs might look like this:
+
+                Controller program:
+                    int main() 
+                    {
+                        svm_struct_controller_node cont;
+                        cont.set_c(100);
+                        // Tell cont where the processing nodes are on your network.
+                        cont.add_processing_node("192.168.1.10:12345");
+                        cont.add_processing_node("192.168.1.11:12345");
+                        cont.add_processing_node("192.168.1.12:12345");
+                        matrix<double> w;
+                        oca solver;
+                        cont(solver, w); // Run the optimization.
+                        // After this finishes w will contain the solution vector.
+                    }
+
+                Processing programs (they are all the same, except that each loads a different subset
+                of the training data):
+                    int main()
+                    {
+                        // Put one third of your data into this problem object.  How you do this depends on your problem.
+                        your_structural_svm_problem problem;
+                        svm_struct_processing_node node(problem, 12345, number_of_cores_on_this_computer);
+                        cout << "hit enter to terminate this program" << endl;
+                        cin.get();
+                    }
+
+        !*/
+
+    public:
+
+        svm_struct_controller_node (
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        void set_epsilon (
+            double eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        double get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer 
+                  to execute.  Specifically, the algorithm stops when the average sample
+                  risk (i.e. R(w) as defined by the dlib::structural_svm_problem object) is 
+                  within epsilon of its optimal value.
+
+                  Also note that sample risk is an upper bound on a sample's loss.  So
+                  you can think of this epsilon value as saying "solve the optimization
+                  problem until the average loss per sample is within epsilon of it's 
+                  optimal value".
+        !*/
+
+        double get_cache_based_epsilon (
+        ) const;
+        /*!
+            ensures
+                - if (get_max_cache_size() != 0) then
+                    - The solver will not stop when the average sample risk is within
+                      get_epsilon() of its optimal value.  Instead, it will keep running
+                      but will run the optimizer completely on the cache until the average
+                      sample risk is within #get_cache_based_epsilon() of its optimal
+                      value.  This means that it will perform this additional refinement in
+                      the solution accuracy without making any additional calls to the
+                      separation_oracle().  This is useful when using a nuclear norm
+                      regularization term because it allows you to quickly solve the
+                      optimization problem to a high precision, which in the case of a
+                      nuclear norm regularized problem means that many of the learned
+                      matrices will be low rank or very close to low rank due to the
+                      nuclear norm regularizer.  This may not happen without solving the
+                      problem to a high accuracy or their ranks may be difficult to
+                      determine, so the extra accuracy given by the cache based refinement
+                      is very useful.  Finally, note that we include the nuclear norm term
+                      as part of the "risk" for the purposes of determining when to stop.  
+                - else
+                    - The value of #get_cache_based_epsilon() has no effect.
+        !*/
+
+        void set_cache_based_epsilon (
+            double eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_cache_based_epsilon() == eps
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        unsigned long get_max_iterations (
+        ); 
+        /*!
+            ensures
+                - returns the maximum number of iterations the SVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        void add_nuclear_norm_regularizer (
+            long first_dimension,
+            long rows,
+            long cols,
+            double regularization_strength
+        );
+        /*!
+            requires
+                - 0 <= first_dimension < number of dimensions in problem 
+                - 0 <= rows
+                - 0 <= cols
+                - first_dimension+rows*cols <= number of dimensions in problem
+                - 0 < regularization_strength
+            ensures
+                - Adds a nuclear norm regularization term to the optimization problem
+                  solved by this object.  That is, instead of solving:
+                    Minimize: h(w) == 0.5*dot(w,w) + C*R(w)
+                  this object will solve:
+                    Minimize: h(w) == 0.5*dot(w,w) + C*R(w) + regularization_strength*nuclear_norm_of(part of w)
+                  where "part of w" is the part of w indicated by the arguments to this
+                  function. In particular, the part of w included in the nuclear norm is
+                  exactly the matrix reshape(rowm(w, range(first_dimension, first_dimension+rows*cols-1)), rows, cols).
+                  Therefore, if you think of the w vector as being the concatenation of a
+                  bunch of matrices then you can use multiple calls to add_nuclear_norm_regularizer() 
+                  to add nuclear norm regularization terms to any of the matrices packed into w.
+                - #num_nuclear_norm_regularizers() == num_nuclear_norm_regularizers() + 1
+        !*/
+
+        unsigned long num_nuclear_norm_regularizers (
+        ) const; 
+        /*!
+            ensures
+                - returns the number of nuclear norm regularizers that are currently a part
+                  of this optimization problem.  That is, returns the number of times
+                  add_nuclear_norm_regularizer() has been called since the last call to
+                  clear_nuclear_norm_regularizers() or object construction, whichever is
+                  most recent.
+        !*/
+
+        void clear_nuclear_norm_regularizers (
+        );
+        /*!
+            ensures
+                - #num_nuclear_norm_regularizers() == 0
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet(
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        double get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter that 
+                  determines the trade off between trying to fit the training data 
+                  exactly or allowing more errors but hopefully improving the 
+                  generalization of the resulting classifier.  Larger values encourage 
+                  exact fitting while smaller values of C may encourage better 
+                  generalization. 
+        !*/
+
+        void set_c (
+            double C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() == C
+        !*/
+
+        void add_processing_node (
+            const network_address& addr
+        );
+        /*!
+            requires
+                - addr.port != 0
+            ensures
+                - if (this address hasn't already been added) then
+                    - #get_num_processing_nodes() == get_num_processing_nodes() + 1
+                    - When operator() is invoked to solve the structural svm problem this
+                      object will connect to the svm_struct_processing_node located at the
+                      given network address and will include it in the distributed
+                      optimization.
+        !*/
+
+        void add_processing_node (
+            const std::string& ip_or_hostname,
+            unsigned short port
+        );
+        /*!
+            requires
+                - port != 0
+            ensures
+                - invokes: add_processing_node(network_address(ip_or_hostname, port))
+        !*/
+
+        unsigned long get_num_processing_nodes (
+        ) const;
+        /*!
+            ensures
+                - returns the number of remote processing nodes that have been
+                  registered with this object.
+        !*/
+
+        void remove_processing_nodes (
+        );
+        /*!
+            ensures
+                - #get_num_processing_nodes() == 0
+        !*/
+
+        class invalid_problem : public error {};
+
+        template <typename matrix_type>
+        double operator() (
+            const oca& solver,
+            matrix_type& w
+        ) const;
+        /*!
+            requires
+                - get_num_processing_nodes() != 0
+                - matrix_type == a dlib::matrix capable of storing column vectors
+            ensures
+                - connects to the processing nodes and begins optimizing the structural
+                  svm problem using the given oca solver.
+                - stores the solution in #w
+                - returns the objective value at the solution #w
+            throws
+                - invalid_problem
+                  This exception is thrown if the svm_struct_processing_nodes disagree
+                  on the dimensionality of the problem.  That is, if they disagree on
+                  the value of structural_svm_problem::get_num_dimensions().
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_DISTRIBUTeD_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/structural_svm_graph_labeling_problem.h b/ml/dlib/dlib/svm/structural_svm_graph_labeling_problem.h
new file mode 100644
index 000000000..c677861c9
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_graph_labeling_problem.h
@@ -0,0 +1,542 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_SVM_GRAPH_LAbELING_PROBLEM_Hh_
+#define DLIB_STRUCTURAL_SVM_GRAPH_LAbELING_PROBLEM_Hh_
+
+
+#include "structural_svm_graph_labeling_problem_abstract.h"
+#include "../graph_cuts.h"
+#include "../matrix.h"
+#include "../array.h"
+#include <vector>
+#include <iterator>
+#include "structural_svm_problem_threaded.h"
+#include "../graph.h"
+#include "sparse_vector.h"
+#include <sstream>
+
+// ----------------------------------------------------------------------------------------
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename graph_type
+        >
+    bool is_graph_labeling_problem (
+        const dlib::array<graph_type>& samples,
+        const std::vector<std::vector<bool> >& labels,
+        std::string& reason_for_failure
+    )
+    {
+        typedef typename graph_type::type node_vector_type;
+        typedef typename graph_type::edge_type edge_vector_type;
+        // The graph must use all dense vectors or all sparse vectors.  It can't mix the two types together.
+        COMPILE_TIME_ASSERT( (is_matrix<node_vector_type>::value && is_matrix<edge_vector_type>::value) ||
+                            (!is_matrix<node_vector_type>::value && !is_matrix<edge_vector_type>::value));
+                            
+
+        std::ostringstream sout;
+        reason_for_failure.clear();
+
+        if (!is_learning_problem(samples, labels))
+        {
+            reason_for_failure = "is_learning_problem(samples, labels) returned false.";
+            return false;
+        }
+
+        const bool ismat = is_matrix<typename graph_type::type>::value;
+
+        // these are -1 until assigned with a value
+        long node_dims = -1;
+        long edge_dims = -1;
+
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            if (samples[i].number_of_nodes() != labels[i].size())
+            {
+                sout << "samples["<<i<<"].number_of_nodes() doesn't match labels["<<i<<"].size().";
+                reason_for_failure = sout.str();
+                return false;
+            }
+            if (graph_contains_length_one_cycle(samples[i]))
+            {
+                sout << "graph_contains_length_one_cycle(samples["<<i<<"]) returned true.";
+                reason_for_failure = sout.str();
+                return false;
+            }
+
+            for (unsigned long j = 0; j < samples[i].number_of_nodes(); ++j)
+            {
+                if (ismat && samples[i].node(j).data.size() == 0)
+                {
+                    sout << "A graph contains an empty vector at node: samples["<<i<<"].node("<<j<<").data.";
+                    reason_for_failure = sout.str();
+                    return false;
+                }
+
+                if (ismat && node_dims == -1)
+                    node_dims = samples[i].node(j).data.size();
+                // all nodes must have vectors of the same size. 
+                if (ismat && (long)samples[i].node(j).data.size() != node_dims)
+                {
+                    sout << "Not all node vectors in samples["<<i<<"] are the same dimension.";
+                    reason_for_failure = sout.str();
+                    return false;
+                }
+
+                for (unsigned long n = 0; n < samples[i].node(j).number_of_neighbors(); ++n)
+                {
+                    if (ismat && samples[i].node(j).edge(n).size() == 0)
+                    {
+                        sout << "A graph contains an empty vector at edge: samples["<<i<<"].node("<<j<<").edge("<<n<<").";
+                        reason_for_failure = sout.str();
+                        return false;
+                    }
+                    if (min(samples[i].node(j).edge(n)) < 0)
+                    {
+                        sout << "A graph contains negative values on an edge vector at: samples["<<i<<"].node("<<j<<").edge("<<n<<").";
+                        reason_for_failure = sout.str();
+                        return false;
+                    }
+
+                    if (ismat && edge_dims == -1)
+                        edge_dims = samples[i].node(j).edge(n).size();
+                    // all edges must have vectors of the same size.
+                    if (ismat && (long)samples[i].node(j).edge(n).size() != edge_dims)
+                    {
+                        sout << "Not all edge vectors in samples["<<i<<"] are the same dimension.";
+                        reason_for_failure = sout.str();
+                        return false;
+                    }
+                }
+            }
+        }
+
+        return true;
+    }
+
+    template <
+        typename graph_type
+        >
+    bool is_graph_labeling_problem (
+        const dlib::array<graph_type>& samples,
+        const std::vector<std::vector<bool> >& labels
+    )
+    {
+        std::string reason_for_failure;
+        return is_graph_labeling_problem(samples, labels, reason_for_failure);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    bool sizes_match (
+        const std::vector<std::vector<T> >& lhs,
+        const std::vector<std::vector<U> >& rhs
+    )
+    {
+        if (lhs.size() != rhs.size())
+            return false;
+
+        for (unsigned long i = 0; i < lhs.size(); ++i)
+        {
+            if (lhs[i].size() != rhs[i].size())
+                return false;
+        }
+
+        return true;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    inline bool all_values_are_nonnegative (
+        const std::vector<std::vector<double> >& x
+    )
+    {
+        for (unsigned long i = 0; i < x.size(); ++i)
+        {
+            for (unsigned long j = 0; j < x[i].size(); ++j)
+            {
+                if (x[i][j] < 0)
+                    return false;
+            }
+        }
+        return true;
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <
+            typename T,
+            typename enable = void
+            >
+        struct fvect
+        {
+            // In this case type should be some sparse vector type
+            typedef typename T::type type;
+        };
+
+        template < typename T >
+        struct fvect<T, typename enable_if<is_matrix<typename T::type> >::type>
+        {
+            // The point of this stuff is to create the proper matrix
+            // type to represent the concatenation of an edge vector
+            // with an node vector.
+            typedef typename T::type      node_mat;
+            typedef typename T::edge_type edge_mat;
+            const static long NRd = node_mat::NR; 
+            const static long NRe = edge_mat::NR; 
+            const static long NR = ((NRd!=0) && (NRe!=0)) ? (NRd+NRe) : 0;
+            typedef typename node_mat::value_type value_type;
+
+            typedef matrix<value_type,NR,1, typename node_mat::mem_manager_type, typename node_mat::layout_type> type;
+        };
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename graph_type 
+        >
+    class structural_svm_graph_labeling_problem : noncopyable,
+        public structural_svm_problem_threaded<matrix<double,0,1>, 
+                                            typename dlib::impl::fvect<graph_type>::type >
+    {
+    public:
+        typedef matrix<double,0,1> matrix_type;
+        typedef typename dlib::impl::fvect<graph_type>::type feature_vector_type;
+
+        typedef graph_type sample_type;
+
+        typedef std::vector<bool> label_type;
+
+        structural_svm_graph_labeling_problem(
+            const dlib::array<sample_type>& samples_,
+            const std::vector<label_type>& labels_,
+            const std::vector<std::vector<double> >& losses_,
+            unsigned long num_threads = 2
+        ) :
+            structural_svm_problem_threaded<matrix_type,feature_vector_type>(num_threads),
+            samples(samples_),
+            labels(labels_),
+            losses(losses_)
+        {
+            // make sure requires clause is not broken
+#ifdef ENABLE_ASSERTS
+            std::string reason_for_failure;
+            DLIB_ASSERT(is_graph_labeling_problem(samples, labels, reason_for_failure) == true ,
+                    "\t structural_svm_graph_labeling_problem::structural_svm_graph_labeling_problem()"
+                    << "\n\t Invalid inputs were given to this function."
+                    << "\n\t reason_for_failure: " << reason_for_failure 
+                    << "\n\t samples.size(): " << samples.size() 
+                    << "\n\t labels.size():  " << labels.size() 
+                    << "\n\t this: " << this );
+            DLIB_ASSERT((losses.size() == 0 || sizes_match(labels, losses) == true) &&
+                        all_values_are_nonnegative(losses) == true,
+                    "\t structural_svm_graph_labeling_problem::structural_svm_graph_labeling_problem()"
+                    << "\n\t Invalid inputs were given to this function."
+                    << "\n\t labels.size():  " << labels.size() 
+                    << "\n\t losses.size():  " << losses.size() 
+                    << "\n\t sizes_match(labels,losses): " << sizes_match(labels,losses) 
+                    << "\n\t all_values_are_nonnegative(losses): " << all_values_are_nonnegative(losses) 
+                    << "\n\t this: " << this );
+#endif
+
+            loss_pos = 1.0;
+            loss_neg = 1.0;
+
+            // figure out how many dimensions are in the node and edge vectors.
+            node_dims = 0;
+            edge_dims = 0;
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                for (unsigned long j = 0; j < samples[i].number_of_nodes(); ++j)
+                {
+                    node_dims = std::max(node_dims,(long)max_index_plus_one(samples[i].node(j).data));
+                    for (unsigned long n = 0; n < samples[i].node(j).number_of_neighbors(); ++n)
+                    {
+                        edge_dims = std::max(edge_dims, (long)max_index_plus_one(samples[i].node(j).edge(n)));
+                    }
+                }
+            }
+        }
+
+        const std::vector<std::vector<double> >& get_losses (
+        ) const { return losses; }
+
+        long get_num_edge_weights (
+        ) const
+        { 
+            return edge_dims;
+        }
+
+        void set_loss_on_positive_class (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss >= 0 && get_losses().size() == 0,
+                    "\t void structural_svm_graph_labeling_problem::set_loss_on_positive_class()"
+                    << "\n\t Invalid inputs were given to this function."
+                    << "\n\t loss: " << loss 
+                    << "\n\t this: " << this );
+
+            loss_pos = loss;
+        }
+
+        void set_loss_on_negative_class (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss >= 0 && get_losses().size() == 0,
+                    "\t void structural_svm_graph_labeling_problem::set_loss_on_negative_class()"
+                    << "\n\t Invalid inputs were given to this function."
+                    << "\n\t loss: " << loss 
+                    << "\n\t this: " << this );
+
+            loss_neg = loss;
+        }
+
+        double get_loss_on_negative_class (
+        ) const 
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(get_losses().size() == 0,
+                    "\t double structural_svm_graph_labeling_problem::get_loss_on_negative_class()"
+                    << "\n\t Invalid inputs were given to this function."
+                    << "\n\t this: " << this );
+
+            return loss_neg; 
+        }
+
+        double get_loss_on_positive_class (
+        ) const 
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(get_losses().size() == 0,
+                    "\t double structural_svm_graph_labeling_problem::get_loss_on_positive_class()"
+                    << "\n\t Invalid inputs were given to this function."
+                    << "\n\t this: " << this );
+
+            return loss_pos; 
+        }
+
+
+    private:
+        virtual long get_num_dimensions (
+        ) const 
+        {
+            // The psi/w vector will begin with all the edge dims and then follow with the node dims.
+            return edge_dims + node_dims;
+        }
+
+        virtual long get_num_samples (
+        ) const 
+        {
+            return samples.size();
+        }
+
+        template <typename psi_type>
+        typename enable_if<is_matrix<psi_type> >::type get_joint_feature_vector (
+            const sample_type& sample, 
+            const label_type& label,
+            psi_type& psi
+        ) const 
+        {
+            psi.set_size(get_num_dimensions());
+            psi = 0;
+            for (unsigned long i = 0; i < sample.number_of_nodes(); ++i)
+            {
+                // accumulate the node vectors
+                if (label[i] == true)
+                    set_rowm(psi, range(edge_dims, psi.size()-1)) += sample.node(i).data;
+
+                for (unsigned long n = 0; n < sample.node(i).number_of_neighbors(); ++n)
+                {
+                    const unsigned long j = sample.node(i).neighbor(n).index();
+
+                    // Don't double count edges.  Also only include the vector if
+                    // the labels disagree.
+                    if (i < j && label[i] != label[j])
+                    {
+                        set_rowm(psi, range(0, edge_dims-1)) -= sample.node(i).edge(n);
+                    }
+                }
+            }
+        }
+
+        template <typename T>
+        void add_to_sparse_vect (
+            T& psi,
+            const T& vect,
+            unsigned long offset 
+        ) const
+        {
+            for (typename T::const_iterator i = vect.begin(); i != vect.end(); ++i)
+            {
+                psi.insert(psi.end(), std::make_pair(i->first+offset, i->second));
+            }
+        }
+
+        template <typename T>
+        void subtract_from_sparse_vect (
+            T& psi,
+            const T& vect
+        ) const
+        {
+            for (typename T::const_iterator i = vect.begin(); i != vect.end(); ++i)
+            {
+                psi.insert(psi.end(), std::make_pair(i->first, -i->second));
+            }
+        }
+
+        template <typename psi_type>
+        typename disable_if<is_matrix<psi_type> >::type get_joint_feature_vector (
+            const sample_type& sample, 
+            const label_type& label,
+            psi_type& psi
+        ) const 
+        {
+            psi.clear();
+            for (unsigned long i = 0; i < sample.number_of_nodes(); ++i)
+            {
+                // accumulate the node vectors
+                if (label[i] == true)
+                    add_to_sparse_vect(psi, sample.node(i).data, edge_dims);
+
+                for (unsigned long n = 0; n < sample.node(i).number_of_neighbors(); ++n)
+                {
+                    const unsigned long j = sample.node(i).neighbor(n).index();
+
+                    // Don't double count edges.  Also only include the vector if
+                    // the labels disagree.
+                    if (i < j && label[i] != label[j])
+                    {
+                        subtract_from_sparse_vect(psi, sample.node(i).edge(n));
+                    }
+                }
+            }
+        }
+
+        virtual void get_truth_joint_feature_vector (
+            long idx,
+            feature_vector_type& psi 
+        ) const 
+        {
+            get_joint_feature_vector(samples[idx], labels[idx], psi);
+        }
+
+        virtual void separation_oracle (
+            const long idx,
+            const matrix_type& current_solution,
+            double& loss,
+            feature_vector_type& psi
+        ) const
+        {
+            const sample_type& samp = samples[idx];
+
+            // setup the potts graph based on samples[idx] and current_solution.
+            graph<double,double>::kernel_1a g; 
+            copy_graph_structure(samp, g);
+            for (unsigned long i = 0; i < g.number_of_nodes(); ++i)
+            {
+                g.node(i).data = dot(rowm(current_solution,range(edge_dims,current_solution.size()-1)),
+                                    samp.node(i).data);
+
+                // Include a loss augmentation so that we will get the proper loss augmented
+                // max when we use find_max_factor_graph_potts() below.
+                if (labels[idx][i])
+                    g.node(i).data -= get_loss_for_sample(idx,i,!labels[idx][i]);
+                else
+                    g.node(i).data += get_loss_for_sample(idx,i,!labels[idx][i]);
+
+                for (unsigned long n = 0; n < g.node(i).number_of_neighbors(); ++n)
+                {
+                    const unsigned long j = g.node(i).neighbor(n).index();
+                    // Don't compute an edge weight more than once. 
+                    if (i < j)
+                    {
+                        g.node(i).edge(n) = dot(rowm(current_solution,range(0,edge_dims-1)),
+                                                samp.node(i).edge(n));
+                    }
+                }
+
+            }
+
+            std::vector<node_label> labeling;
+            find_max_factor_graph_potts(g, labeling);
+
+
+            std::vector<bool> bool_labeling;
+            bool_labeling.reserve(labeling.size());
+            // figure out the loss
+            loss = 0;
+            for (unsigned long i = 0; i < labeling.size(); ++i)
+            {
+                const bool predicted_label = (labeling[i]!= 0);
+                bool_labeling.push_back(predicted_label);
+                loss += get_loss_for_sample(idx, i, predicted_label);
+            }
+
+            // compute psi
+            get_joint_feature_vector(samp, bool_labeling, psi);
+        }
+
+        double get_loss_for_sample (
+            long sample_idx,
+            long node_idx,
+            bool predicted_label
+        ) const
+        /*!
+            requires
+                - 0 <= sample_idx < labels.size()
+                - 0 <= node_idx < labels[sample_idx].size()
+            ensures
+                - returns the loss incurred for predicting that the node
+                  samples[sample_idx].node(node_idx) has a label of predicted_label.
+        !*/
+        {
+                const bool true_label = labels[sample_idx][node_idx];
+                if (true_label != predicted_label)
+                {
+                    if (losses.size() != 0)
+                        return losses[sample_idx][node_idx];
+                    else if (true_label == true)
+                        return loss_pos;
+                    else
+                        return loss_neg;
+                }
+                else
+                {
+                    // no loss for making the correct prediction.
+                    return 0;
+                }
+        }
+
+        const dlib::array<sample_type>& samples;
+        const std::vector<label_type>& labels;
+        const std::vector<std::vector<double> >& losses;
+
+        long node_dims;
+        long edge_dims;
+        double loss_pos;
+        double loss_neg;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_GRAPH_LAbELING_PROBLEM_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/structural_svm_graph_labeling_problem_abstract.h b/ml/dlib/dlib/svm/structural_svm_graph_labeling_problem_abstract.h
new file mode 100644
index 000000000..ab99ed8f4
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_graph_labeling_problem_abstract.h
@@ -0,0 +1,249 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_SVM_GRAPH_LAbELING_PROBLEM_ABSTRACT_Hh_
+#ifdef DLIB_STRUCTURAL_SVM_GRAPH_LAbELING_PROBLEM_ABSTRACT_Hh_
+
+#include "../array/array_kernel_abstract.h"
+#include "../graph/graph_kernel_abstract.h"
+#include "../matrix/matrix_abstract.h"
+#include "sparse_vector_abstract.h"
+#include "structural_svm_problem_threaded_abstract.h"
+#include <vector>
+
+// ----------------------------------------------------------------------------------------
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename graph_type
+        >
+    bool is_graph_labeling_problem (
+        const dlib::array<graph_type>& samples,
+        const std::vector<std::vector<bool> >& labels
+    );
+    /*!
+        requires
+            - graph_type is an implementation of dlib/graph/graph_kernel_abstract.h
+            - graph_type::type and graph_type::edge_type are either both dlib::matrix types
+              capable of containing column vectors or both some kind of sparse vector type
+              as defined in dlib/svm/sparse_vector_abstract.h.
+        ensures
+            - Note that a graph labeling problem is a task to learn a binary classifier which 
+              predicts the correct label for each node in the provided graphs.  Additionally, 
+              we have information in the form of edges between nodes where edges are present 
+              when we believe the linked nodes are likely to have the same label.  Therefore, 
+              part of a graph labeling problem is to learn to score each edge in terms of how 
+              strongly the edge should enforce labeling consistency between its two nodes.  
+              Thus, to be a valid graph labeling problem, samples should contain example graphs 
+              of connected nodes while labels should indicate the desired label of each node.  
+              The precise requirements for a valid graph labeling problem are listed below.
+            - This function returns true if all of the following are true and false otherwise:
+                - is_learning_problem(samples, labels) == true
+                - All the vectors stored on the edges of each graph in samples 
+                  contain only values which are >= 0. 
+                - for all valid i:
+                    - graph_contains_length_one_cycle(samples[i]) == false 
+                    - samples[i].number_of_nodes() == labels[i].size()
+                      (i.e. Every graph node gets its own label)
+                - if (graph_type::edge_type is a dlib::matrix) then     
+                    - All the nodes must contain vectors with the same number of dimensions.
+                    - All the edges must contain vectors with the same number of dimensions.
+                      (However, edge vectors may differ in dimension from node vectors.)
+                    - All vectors have non-zero size.  That is, they have more than 0 dimensions.
+    !*/
+
+    template <
+        typename graph_type
+        >
+    bool is_graph_labeling_problem (
+        const dlib::array<graph_type>& samples,
+        const std::vector<std::vector<bool> >& labels,
+        std::string& reason_for_failure
+    );
+    /*!
+        This function is identical to the above version of is_graph_labeling_problem()
+        except that if it returns false it will populate reason_for_failure with a message
+        describing why the graph is not a valid learning problem.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    bool sizes_match (
+        const std::vector<std::vector<T> >& lhs,
+        const std::vector<std::vector<U> >& rhs
+    );
+    /*!
+        ensures
+            - returns true if the sizes of lhs and rhs, as well as their constituent vectors
+              all match.  In particular, we return true if all of the following conditions are
+              met and false otherwise:
+                - lhs.size() == rhs.size()
+                - for all valid i:
+                    - lhs[i].size() == rhs[i].size()
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    bool all_values_are_nonnegative (
+        const std::vector<std::vector<double> >& x
+    );
+    /*!
+        ensures
+            - returns true if all the double values contained in x are >= 0.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename graph_type 
+        >
+    class structural_svm_graph_labeling_problem : noncopyable,
+                                                  public structural_svm_problem_threaded<matrix<double,0,1>, 
+                                                         typename graph_type::type >
+    {
+        /*!
+            REQUIREMENTS ON graph_type 
+                - graph_type is an implementation of dlib/graph/graph_kernel_abstract.h
+                - graph_type::type and graph_type::edge_type must be either matrix objects
+                  capable of representing column vectors or some kind of sparse vector
+                  type as defined in dlib/svm/sparse_vector_abstract.h.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for learning the weight vectors needed to use
+                a graph_labeler object.  It learns the parameter vectors by formulating 
+                the problem as a structural SVM problem.  
+        !*/
+
+    public:
+        typedef matrix<double,0,1> matrix_type;
+        typedef typename graph_type::type feature_vector_type;
+        typedef graph_type sample_type;
+        typedef std::vector<bool> label_type;
+
+        structural_svm_graph_labeling_problem(
+            const dlib::array<sample_type>& samples,
+            const std::vector<label_type>& labels,
+            const std::vector<std::vector<double> >& losses,
+            unsigned long num_threads 
+        );
+        /*!
+            requires
+                - is_graph_labeling_problem(samples,labels) == true
+                - if (losses.size() != 0) then
+                    - sizes_match(labels, losses) == true
+                    - all_values_are_nonnegative(losses) == true
+            ensures
+                - This object attempts to learn a mapping from the given samples to the 
+                  given labels.  In particular, it attempts to learn to predict labels[i] 
+                  based on samples[i].  Or in other words, this object can be used to learn 
+                  parameter vectors, E and W, such that a graph_labeler declared as:
+                    graph_labeler<feature_vector_type> labeler(E,W)
+                  results in a labeler object which attempts to compute the following mapping:
+                    labels[i] == labeler(samples[i])
+                - When you use this object with the oca optimizer you get back just one
+                  big parameter vector as the solution.  Therefore, note that this single
+                  big vector is the concatenation of E and W.  The first get_num_edge_weights()
+                  elements of this vector correspond to E and the rest is W.
+                - This object will use num_threads threads during the optimization 
+                  procedure.  You should set this parameter equal to the number of 
+                  available processing cores on your machine.
+                - if (losses.size() == 0) then
+                    - #get_loss_on_positive_class() == 1.0
+                    - #get_loss_on_negative_class() == 1.0
+                    - #get_losses().size() == 0
+                    - The losses argument is effectively ignored if its size is zero.
+                - else
+                    - #get_losses() == losses
+                    - Each node in the training data has its own loss value defined by
+                      the corresponding entry of losses.  In particular, this means that 
+                      the node with label labels[i][j] incurs a loss of losses[i][j] if 
+                      it is incorrectly labeled.
+                    - The get_loss_on_positive_class() and get_loss_on_negative_class()
+                      parameters are ignored.  Only get_losses() is used in this case.
+        !*/
+
+        const std::vector<std::vector<double> >& get_losses (
+        ) const;
+        /*!
+            ensures
+                - returns the losses vector given to this object's constructor. 
+                  This vector defines the per sample loss values used.  If the vector
+                  is empty then the loss values defined by get_loss_on_positive_class() and
+                  get_loss_on_positive_class() are used instead.
+        !*/
+
+        long get_num_edge_weights (
+        ) const;
+        /*!
+            ensures
+                - returns the dimensionality of the edge weight vector.  It is also
+                  important to know that when using the oca solver with this object,
+                  you must set it to generate non-negative weights for the edge weight
+                  part of the total weight vector.  You can do this by passing get_num_edge_weights()
+                  to the third argument to oca::operator().
+        !*/
+
+        void set_loss_on_positive_class (
+            double loss
+        );
+        /*!
+            requires
+                - loss >= 0
+                - get_losses().size() == 0
+            ensures
+                - #get_loss_on_positive_class() == loss
+        !*/
+
+        void set_loss_on_negative_class (
+            double loss
+        );
+        /*!
+            requires
+                - loss >= 0
+                - get_losses().size() == 0
+            ensures
+                - #get_loss_on_negative_class() == loss
+        !*/
+
+        double get_loss_on_positive_class (
+        ) const;
+        /*!
+            requires
+                - get_losses().size() == 0
+            ensures
+                - returns the loss incurred when a graph node which is supposed to have
+                  a label of true gets misclassified.  This value controls how much we care 
+                  about correctly classifying nodes which should be labeled as true.  Larger 
+                  loss values indicate that we care more strongly than smaller values.
+        !*/
+
+        double get_loss_on_negative_class (
+        ) const;
+        /*!
+            requires
+                - get_losses().size() == 0
+            ensures
+                - returns the loss incurred when a graph node which is supposed to have
+                  a label of false gets misclassified.  This value controls how much we care 
+                  about correctly classifying nodes which should be labeled as false.  Larger 
+                  loss values indicate that we care more strongly than smaller values.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_GRAPH_LAbELING_PROBLEM_ABSTRACT_Hh_
+
+
+
+
diff --git a/ml/dlib/dlib/svm/structural_svm_object_detection_problem.h b/ml/dlib/dlib/svm/structural_svm_object_detection_problem.h
new file mode 100644
index 000000000..1c54a42b1
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_object_detection_problem.h
@@ -0,0 +1,531 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_SVM_ObJECT_DETECTION_PROBLEM_Hh_
+#define DLIB_STRUCTURAL_SVM_ObJECT_DETECTION_PROBLEM_Hh_
+
+#include "structural_svm_object_detection_problem_abstract.h"
+#include "../matrix.h"
+#include "structural_svm_problem_threaded.h"
+#include <sstream>
+#include "../string.h"
+#include "../array.h"
+#include "../image_processing/full_object_detection.h"
+#include "../image_processing/box_overlap_testing.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_scanner_type,
+        typename image_array_type 
+        >
+    class structural_svm_object_detection_problem : public structural_svm_problem_threaded<matrix<double,0,1> >,
+                                                    noncopyable
+    {
+    public:
+
+        structural_svm_object_detection_problem(
+            const image_scanner_type& scanner,
+            const test_box_overlap& overlap_tester,
+            const bool auto_overlap_tester,
+            const image_array_type& images_,
+            const std::vector<std::vector<full_object_detection> >& truth_object_detections_,
+            const std::vector<std::vector<rectangle> >& ignore_,
+            const test_box_overlap& ignore_overlap_tester_,
+            unsigned long num_threads = 2
+        ) :
+            structural_svm_problem_threaded<matrix<double,0,1> >(num_threads),
+            boxes_overlap(overlap_tester),
+            images(images_),
+            truth_object_detections(truth_object_detections_),
+            ignore(ignore_),
+            ignore_overlap_tester(ignore_overlap_tester_),
+            match_eps(0.5),
+            loss_per_false_alarm(1),
+            loss_per_missed_target(1)
+        {
+#ifdef ENABLE_ASSERTS
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(images_, truth_object_detections_) && 
+                        ignore_.size() == images_.size() &&
+                         scanner.get_num_detection_templates() > 0,
+                "\t structural_svm_object_detection_problem::structural_svm_object_detection_problem()"
+                << "\n\t Invalid inputs were given to this function "
+                << "\n\t scanner.get_num_detection_templates(): " << scanner.get_num_detection_templates()
+                << "\n\t is_learning_problem(images_,truth_object_detections_): " << is_learning_problem(images_,truth_object_detections_)
+                << "\n\t ignore.size(): " << ignore.size() 
+                << "\n\t images.size(): " << images.size() 
+                << "\n\t this: " << this
+                );
+            for (unsigned long i = 0; i < truth_object_detections.size(); ++i)
+            {
+                for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j)
+                {
+                    DLIB_ASSERT(truth_object_detections[i][j].num_parts() == scanner.get_num_movable_components_per_detection_template(),
+                        "\t trained_function_type structural_object_detection_trainer::train()"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t truth_object_detections["<<i<<"]["<<j<<"].num_parts():          " << 
+                            truth_object_detections[i][j].num_parts()
+                        << "\n\t scanner.get_num_movable_components_per_detection_template(): " << 
+                            scanner.get_num_movable_components_per_detection_template()
+                        << "\n\t all_parts_in_rect(truth_object_detections["<<i<<"]["<<j<<"]): " << all_parts_in_rect(truth_object_detections[i][j])
+                    );
+                }
+            }
+#endif
+            // The purpose of the max_num_dets member variable is to give us a reasonable
+            // upper limit on the number of detections we can expect from a single image.
+            // This is used in the separation_oracle to put a hard limit on the number of
+            // detections we will consider.  We do this purely for computational reasons
+            // since otherwise we can end up wasting large amounts of time on certain
+            // pathological cases during optimization which ultimately do not influence the
+            // result.  Therefore, we force the separation oracle to only consider the
+            // max_num_dets strongest detections.
+            max_num_dets = 0;
+            for (unsigned long i = 0; i < truth_object_detections.size(); ++i)
+            {
+                if (truth_object_detections[i].size() > max_num_dets)
+                    max_num_dets = truth_object_detections[i].size();
+            }
+            max_num_dets = max_num_dets*3 + 10;
+
+            initialize_scanners(scanner, num_threads);
+
+            if (auto_overlap_tester)
+            {
+                auto_configure_overlap_tester();
+            }
+        }
+
+        test_box_overlap get_overlap_tester (
+        ) const 
+        {
+            return boxes_overlap;
+        }
+
+        void set_match_eps (
+            double eps
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < eps && eps < 1, 
+                "\t void structural_svm_object_detection_problem::set_match_eps(eps)"
+                << "\n\t Invalid inputs were given to this function "
+                << "\n\t eps:  " << eps 
+                << "\n\t this: " << this
+                );
+
+            match_eps = eps;
+        }
+
+        double get_match_eps (
+        ) const
+        {
+            return match_eps;
+        }
+
+        double get_loss_per_missed_target (
+        ) const
+        {
+            return loss_per_missed_target;
+        }
+
+        void set_loss_per_missed_target (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss > 0, 
+                "\t void structural_svm_object_detection_problem::set_loss_per_missed_target(loss)"
+                << "\n\t Invalid inputs were given to this function "
+                << "\n\t loss: " << loss
+                << "\n\t this: " << this
+                );
+
+            loss_per_missed_target = loss;
+        }
+
+        double get_loss_per_false_alarm (
+        ) const
+        {
+            return loss_per_false_alarm;
+        }
+
+        void set_loss_per_false_alarm (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss > 0, 
+                "\t void structural_svm_object_detection_problem::set_loss_per_false_alarm(loss)"
+                << "\n\t Invalid inputs were given to this function "
+                << "\n\t loss: " << loss
+                << "\n\t this: " << this
+                );
+
+            loss_per_false_alarm = loss;
+        }
+
+    private:
+
+        void auto_configure_overlap_tester(
+        )
+        {
+            std::vector<std::vector<rectangle> > mapped_rects(truth_object_detections.size());
+            for (unsigned long i = 0; i < truth_object_detections.size(); ++i)
+            {
+                mapped_rects[i].resize(truth_object_detections[i].size());
+                for (unsigned long j = 0; j < truth_object_detections[i].size(); ++j)
+                {
+                    mapped_rects[i][j] = scanners[i].get_best_matching_rect(truth_object_detections[i][j].get_rect());
+                }
+            }
+
+            boxes_overlap = find_tight_overlap_tester(mapped_rects);
+        }
+
+
+        virtual long get_num_dimensions (
+        ) const 
+        {
+            return scanners[0].get_num_dimensions() + 
+                1;// for threshold
+        }
+
+        virtual long get_num_samples (
+        ) const 
+        {
+            return images.size();
+        }
+
+        virtual void get_truth_joint_feature_vector (
+            long idx,
+            feature_vector_type& psi 
+        ) const 
+        {
+            const image_scanner_type& scanner = scanners[idx];
+
+            psi.set_size(get_num_dimensions());
+            std::vector<rectangle> mapped_rects;
+
+            psi = 0;
+            for (unsigned long i = 0; i < truth_object_detections[idx].size(); ++i)
+            {
+                mapped_rects.push_back(scanner.get_best_matching_rect(truth_object_detections[idx][i].get_rect()));
+                scanner.get_feature_vector(truth_object_detections[idx][i], psi);
+            }
+            psi(scanner.get_num_dimensions()) = -1.0*truth_object_detections[idx].size();
+
+            // check if any of the boxes overlap.  If they do then it is impossible for
+            // us to learn to correctly classify this sample
+            for (unsigned long i = 0; i < mapped_rects.size(); ++i)
+            {
+                for (unsigned long j = i+1; j < mapped_rects.size(); ++j)
+                {
+                    if (boxes_overlap(mapped_rects[i], mapped_rects[j]))
+                    {
+                        const double area_overlap = mapped_rects[i].intersect(mapped_rects[j]).area();
+                        const double match_amount = area_overlap/(double)( mapped_rects[i]+mapped_rects[j]).area();
+                        const double overlap_amount = area_overlap/std::min(mapped_rects[i].area(),mapped_rects[j].area());
+
+                        using namespace std;
+                        ostringstream sout;
+                        sout << "An impossible set of object labels was detected. This is happening because ";
+                        sout << "the truth labels for an image contain rectangles which overlap according to the ";
+                        sout << "test_box_overlap object supplied for non-max suppression.  To resolve this, you ";
+                        sout << "either need to relax the test_box_overlap object so it doesn't mark these rectangles as ";
+                        sout << "overlapping or adjust the truth rectangles in your training dataset. ";
+
+                        // make sure the above string fits nicely into a command prompt window.
+                        string temp = sout.str();
+                        sout.str(""); sout << wrap_string(temp,0,0) << endl << endl;
+
+
+                        sout << "image index: "<< idx << endl;
+                        sout << "The offending rectangles are:\n";
+                        sout << "rect1: "<< mapped_rects[i] << endl;
+                        sout << "rect2: "<< mapped_rects[j] << endl;
+                        sout << "match amount:   " << match_amount << endl;
+                        sout << "overlap amount: " << overlap_amount << endl;
+                        throw dlib::impossible_labeling_error(sout.str());
+                    }
+                }
+            }
+
+            // make sure the mapped rectangles are within match_eps of the
+            // truth rectangles.
+            for (unsigned long i = 0; i < mapped_rects.size(); ++i)
+            {
+                const double area = (truth_object_detections[idx][i].get_rect().intersect(mapped_rects[i])).area();
+                const double total_area = (truth_object_detections[idx][i].get_rect() + mapped_rects[i]).area();
+                if (area/total_area <= match_eps)
+                {
+                    using namespace std;
+                    ostringstream sout;
+                    sout << "An impossible set of object labels was detected.  This is happening because ";
+                    sout << "none of the object locations checked by the supplied image scanner is a close ";
+                    sout << "enough match to one of the truth boxes in your training dataset.  To resolve this ";
+                    sout << "you need to either lower the match_eps, adjust the settings of the image scanner ";
+                    sout << "so that it is capable of hitting this truth box, or adjust the offending truth rectangle so it ";
+                    sout << "can be matched by the current image scanner.  Also, if you ";
+                    sout << "are using the scan_fhog_pyramid object then you could try using a finer image pyramid.  ";
+                    sout << "Additionally, the scan_fhog_pyramid scans a fixed aspect ratio box across the image when it ";
+                    sout << "searches for objects.  So if you are getting this error and you are using the scan_fhog_pyramid, ";
+                    sout << "it's very likely the problem is that your training dataset contains truth rectangles of widely ";
+                    sout << "varying aspect ratios.  The solution is to make sure your training boxes all have about the same aspect ratio. ";
+
+
+                    // make sure the above string fits nicely into a command prompt window.
+                    string temp = sout.str();
+                    sout.str(""); sout << wrap_string(temp,0,0) << endl << endl;
+
+                    sout << "image index              "<< idx << endl;
+                    sout << "match_eps:               "<< match_eps << endl;
+                    sout << "best possible match:     "<< area/total_area << endl;
+                    sout << "truth rect:              "<< truth_object_detections[idx][i].get_rect() << endl;
+                    sout << "truth rect width/height: "<< truth_object_detections[idx][i].get_rect().width()/(double)truth_object_detections[idx][i].get_rect().height() << endl;
+                    sout << "truth rect area:         "<< truth_object_detections[idx][i].get_rect().area() << endl;
+                    sout << "nearest detection template rect:              "<< mapped_rects[i] << endl;
+                    sout << "nearest detection template rect width/height: "<< mapped_rects[i].width()/(double)mapped_rects[i].height() << endl;
+                    sout << "nearest detection template rect area:         "<< mapped_rects[i].area() << endl;
+                    throw dlib::impossible_labeling_error(sout.str());
+                }
+
+            }
+        }
+
+        virtual void separation_oracle (
+            const long idx,
+            const matrix_type& current_solution,
+            scalar_type& loss,
+            feature_vector_type& psi
+        ) const 
+        {
+            const image_scanner_type& scanner = scanners[idx];
+
+            std::vector<std::pair<double, rectangle> > dets;
+            const double thresh = current_solution(scanner.get_num_dimensions());
+
+
+            scanner.detect(current_solution, dets, thresh-loss_per_false_alarm);
+
+
+            // The loss will measure the number of incorrect detections.  A detection is
+            // incorrect if it doesn't hit a truth rectangle or if it is a duplicate detection
+            // on a truth rectangle.
+            loss = truth_object_detections[idx].size()*loss_per_missed_target;
+
+            // Measure the loss augmented score for the detections which hit a truth rect.
+            std::vector<double> truth_score_hits(truth_object_detections[idx].size(), 0);
+
+            // keep track of which truth boxes we have hit so far.
+            std::vector<bool> hit_truth_table(truth_object_detections[idx].size(), false);
+
+            std::vector<rectangle> final_dets;
+            // The point of this loop is to fill out the truth_score_hits array. 
+            for (unsigned long i = 0; i < dets.size() && final_dets.size() < max_num_dets; ++i)
+            {
+                if (overlaps_any_box(boxes_overlap, final_dets, dets[i].second))
+                    continue;
+
+                const std::pair<double,unsigned int> truth = find_best_match(truth_object_detections[idx], dets[i].second);
+
+                final_dets.push_back(dets[i].second);
+
+                const double truth_match = truth.first;
+                // if hit truth rect
+                if (truth_match > match_eps)
+                {
+                    // if this is the first time we have seen a detect which hit truth_object_detections[idx][truth.second]
+                    const double score = dets[i].first - thresh;
+                    if (hit_truth_table[truth.second] == false)
+                    {
+                        hit_truth_table[truth.second] = true;
+                        truth_score_hits[truth.second] += score;
+                    }
+                    else
+                    {
+                        truth_score_hits[truth.second] += score + loss_per_false_alarm;
+                    }
+                }
+            }
+
+            hit_truth_table.assign(hit_truth_table.size(), false);
+
+            final_dets.clear();
+#ifdef ENABLE_ASSERTS
+            double total_score = 0;
+#endif
+            // Now figure out which detections jointly maximize the loss and detection score sum.  We
+            // need to take into account the fact that allowing a true detection in the output, while 
+            // initially reducing the loss, may allow us to increase the loss later with many duplicate
+            // detections.
+            for (unsigned long i = 0; i < dets.size() && final_dets.size() < max_num_dets; ++i)
+            {
+                if (overlaps_any_box(boxes_overlap, final_dets, dets[i].second))
+                    continue;
+
+                const std::pair<double,unsigned int> truth = find_best_match(truth_object_detections[idx], dets[i].second);
+
+                const double truth_match = truth.first;
+                if (truth_match > match_eps)
+                {
+                    if (truth_score_hits[truth.second] > loss_per_missed_target)
+                    {
+                        if (!hit_truth_table[truth.second])
+                        {
+                            hit_truth_table[truth.second] = true;
+                            final_dets.push_back(dets[i].second);
+#ifdef ENABLE_ASSERTS
+                            total_score += dets[i].first;
+#endif
+                            loss -= loss_per_missed_target;
+                        }
+                        else
+                        {
+                            final_dets.push_back(dets[i].second);
+#ifdef ENABLE_ASSERTS
+                            total_score += dets[i].first;
+#endif
+                            loss += loss_per_false_alarm;
+                        }
+                    }
+                }
+                else if (!overlaps_ignore_box(idx,dets[i].second))
+                {
+                    // didn't hit anything
+                    final_dets.push_back(dets[i].second);
+#ifdef ENABLE_ASSERTS
+                    total_score += dets[i].first;
+#endif
+                    loss += loss_per_false_alarm;
+                }
+            }
+
+            psi.set_size(get_num_dimensions());
+            psi = 0;
+            for (unsigned long i = 0; i < final_dets.size(); ++i)
+                scanner.get_feature_vector(scanner.get_full_object_detection(final_dets[i], current_solution), psi);
+
+#ifdef ENABLE_ASSERTS
+            const double psi_score = dot(psi, current_solution);
+            DLIB_CASSERT(std::abs(psi_score-total_score) <= 1e-4 * std::max(1.0,std::max(std::abs(psi_score),std::abs(total_score))),
+                        "\t The get_feature_vector() and detect() methods of image_scanner_type are not in sync." 
+                        << "\n\t The relative error is too large to be attributed to rounding error."
+                        << "\n\t error:       " << std::abs(psi_score-total_score)
+                        << "\n\t psi_score:   " << psi_score
+                        << "\n\t total_score: " << total_score
+            );
+#endif
+
+            psi(scanner.get_num_dimensions()) = -1.0*final_dets.size();
+        }
+
+
+        bool overlaps_ignore_box (
+            const long idx,
+            const dlib::rectangle& rect
+        ) const
+        {
+            for (unsigned long i = 0; i < ignore[idx].size(); ++i)
+            {
+                if (ignore_overlap_tester(ignore[idx][i], rect))
+                    return true;
+            }
+            return false;
+        }
+
+        std::pair<double,unsigned int> find_best_match(
+            const std::vector<full_object_detection>& boxes,
+            const rectangle rect
+        ) const
+        /*!
+            ensures
+                - determines which rectangle in boxes matches rect the most and
+                  returns the amount of this match.  Specifically, the match is
+                  a number O with the following properties:
+                    - 0 <= O <= 1
+                    - Let R be the maximum matching rectangle in boxes, then
+                      O == (R.intersect(rect)).area() / (R + rect).area()
+                    - O == 0 if there is no match with any rectangle.
+        !*/
+        {
+            double match = 0;
+            unsigned int best_idx = 0;
+            for (unsigned long i = 0; i < boxes.size(); ++i)
+            {
+
+                const unsigned long area = rect.intersect(boxes[i].get_rect()).area();
+                if (area != 0)
+                {
+                    const double new_match = area / static_cast<double>((rect + boxes[i].get_rect()).area());
+                    if (new_match > match)
+                    {
+                        match = new_match;
+                        best_idx = i;
+                    }
+                }
+            }
+
+            return std::make_pair(match,best_idx);
+        }
+
+        struct init_scanners_helper
+        {
+            init_scanners_helper (
+                array<image_scanner_type>& scanners_,
+                const image_array_type& images_
+            ) :
+                scanners(scanners_),
+                images(images_)
+            {}
+
+            array<image_scanner_type>& scanners;
+            const image_array_type& images;
+
+            void operator() (long i ) const
+            {
+                scanners[i].load(images[i]);
+            }
+        };
+
+        void initialize_scanners (
+            const image_scanner_type& scanner,
+            unsigned long num_threads
+        )
+        {
+            scanners.set_max_size(images.size());
+            scanners.set_size(images.size());
+
+            for (unsigned long i = 0; i < scanners.size(); ++i)
+                scanners[i].copy_configuration(scanner);
+
+            // now load the images into all the scanners
+            parallel_for(num_threads, 0, scanners.size(), init_scanners_helper(scanners, images));
+        }
+
+
+        test_box_overlap boxes_overlap;
+
+        mutable array<image_scanner_type> scanners;
+
+        const image_array_type& images;
+        const std::vector<std::vector<full_object_detection> >& truth_object_detections;
+        const std::vector<std::vector<rectangle> >& ignore;
+        const test_box_overlap ignore_overlap_tester;
+
+        unsigned long max_num_dets;
+        double match_eps;
+        double loss_per_false_alarm;
+        double loss_per_missed_target;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_ObJECT_DETECTION_PROBLEM_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/structural_svm_object_detection_problem_abstract.h b/ml/dlib/dlib/svm/structural_svm_object_detection_problem_abstract.h
new file mode 100644
index 000000000..d73c5920d
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_object_detection_problem_abstract.h
@@ -0,0 +1,178 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_SVM_ObJECT_DETECTION_PROBLEM_ABSTRACT_Hh_
+#ifdef DLIB_STRUCTURAL_SVM_ObJECT_DETECTION_PROBLEM_ABSTRACT_Hh_
+
+#include "../matrix.h"
+#include "structural_svm_problem_threaded_abstract.h"
+#include <sstream>
+#include "../image_processing/full_object_detection_abstract.h"
+#include "../image_processing/box_overlap_testing.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename image_scanner_type,
+        typename image_array_type 
+        >
+    class structural_svm_object_detection_problem : public structural_svm_problem_threaded<matrix<double,0,1> >,
+                                                    noncopyable
+    {
+        /*!
+            REQUIREMENTS ON image_scanner_type
+                image_scanner_type must be an implementation of 
+                dlib/image_processing/scan_fhog_pyramid_abstract.h or
+                dlib/image_processing/scan_image_custom_abstract.h or
+                dlib/image_processing/scan_image_pyramid_abstract.h or
+                dlib/image_processing/scan_image_boxes_abstract.h
+
+            REQUIREMENTS ON image_array_type
+                image_array_type must be an implementation of dlib/array/array_kernel_abstract.h 
+                and it must contain objects which can be accepted by image_scanner_type::load().
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for learning the parameter vector needed to use a
+                scan_image_pyramid, scan_fhog_pyramid, scan_image_custom, or
+                scan_image_boxes object.  
+
+                It learns the parameter vector by formulating the problem as a structural 
+                SVM problem.  The exact details of the method are described in the paper 
+                Max-Margin Object Detection by Davis E. King (http://arxiv.org/abs/1502.00046).
+
+
+        !*/
+
+    public:
+
+        structural_svm_object_detection_problem(
+            const image_scanner_type& scanner,
+            const test_box_overlap& overlap_tester,
+            const bool auto_overlap_tester,
+            const image_array_type& images,
+            const std::vector<std::vector<full_object_detection> >& truth_object_detections,
+            const std::vector<std::vector<rectangle> >& ignore,
+            const test_box_overlap& ignore_overlap_tester,
+            unsigned long num_threads = 2
+        );
+        /*!
+            requires
+                - is_learning_problem(images, truth_object_detections)
+                - ignore.size() == images.size()
+                - scanner.get_num_detection_templates() > 0
+                - scanner.load(images[0]) must be a valid expression.
+                - for all valid i, j:
+                    - truth_object_detections[i][j].num_parts() == scanner.get_num_movable_components_per_detection_template() 
+                    - all_parts_in_rect(truth_object_detections[i][j]) == true
+            ensures
+                - This object attempts to learn a mapping from the given images to the
+                  object locations given in truth_object_detections.  In particular, it
+                  attempts to learn to predict truth_object_detections[i] based on
+                  images[i].  Or in other words, this object can be used to learn a
+                  parameter vector, w, such that an object_detector declared as:
+                    object_detector<image_scanner_type> detector(scanner,get_overlap_tester(),w)
+                  results in a detector object which attempts to compute the locations of
+                  all the objects in truth_object_detections.  So if you called
+                  detector(images[i]) you would hopefully get a list of rectangles back
+                  that had truth_object_detections[i].size() elements and contained exactly
+                  the rectangles indicated by truth_object_detections[i].
+                - if (auto_overlap_tester == true) then
+                    - #get_overlap_tester() == a test_box_overlap object that is configured
+                      using the find_tight_overlap_tester() routine and the contents of
+                      truth_object_detections. 
+                - else
+                    - #get_overlap_tester() == overlap_tester
+                - #get_match_eps() == 0.5
+                - This object will use num_threads threads during the optimization 
+                  procedure.  You should set this parameter equal to the number of 
+                  available processing cores on your machine.
+                - #get_loss_per_missed_target() == 1
+                - #get_loss_per_false_alarm() == 1
+                - for all valid i:
+                    - Within images[i] any detections that match against a rectangle in
+                      ignore[i], according to ignore_overlap_tester, are ignored.  That is,
+                      the optimizer doesn't care if the detector outputs a detection that
+                      matches any of the ignore rectangles or if it fails to output a
+                      detection for an ignore rectangle.  Therefore, if there are objects
+                      in your dataset that you are unsure you want to detect or otherwise
+                      don't care if the detector gets or doesn't then you can mark them
+                      with ignore rectangles and the optimizer will simply ignore them. 
+        !*/
+
+        test_box_overlap get_overlap_tester (
+        ) const;
+        /*!
+            ensures
+                - returns the overlap tester used by this object.  
+        !*/
+
+        void set_match_eps (
+            double eps
+        );
+        /*!
+            requires
+                - 0 < eps < 1
+            ensures
+                - #get_match_eps() == eps
+        !*/
+
+        double get_match_eps (
+        ) const;
+        /*!
+            ensures
+                - returns the amount of alignment necessary for a detection to be considered
+                  as matching with a ground truth rectangle.  The precise formula for determining
+                  if two rectangles match each other is the following, rectangles A and B match 
+                  if and only if:
+                    A.intersect(B).area()/(A+B).area() > get_match_eps()
+        !*/
+
+        double get_loss_per_missed_target (
+        ) const;
+        /*!
+            ensures
+                - returns the amount of loss experienced for failing to detect one of the
+                  targets.
+        !*/
+
+        void set_loss_per_missed_target (
+            double loss
+        );
+        /*!
+            requires
+                - loss > 0
+            ensures
+                - #get_loss_per_missed_target() == loss
+        !*/
+
+        double get_loss_per_false_alarm (
+        ) const;
+        /*!
+            ensures
+                - returns the amount of loss experienced for emitting a false alarm detection.
+                  Or in other words, the loss for generating a detection that doesn't correspond 
+                  to one of the truth rectangles.
+        !*/
+
+        void set_loss_per_false_alarm (
+            double loss
+        );
+        /*!
+            requires
+                - loss > 0
+            ensures
+                - #get_loss_per_false_alarm() == loss
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_ObJECT_DETECTION_PROBLEM_ABSTRACT_Hh_
+
+
+
diff --git a/ml/dlib/dlib/svm/structural_svm_problem.h b/ml/dlib/dlib/svm/structural_svm_problem.h
new file mode 100644
index 000000000..3a73457b9
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_problem.h
@@ -0,0 +1,649 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_SVM_PRObLEM_Hh_
+#define DLIB_STRUCTURAL_SVM_PRObLEM_Hh_
+
+#include "structural_svm_problem_abstract.h"
+#include "../algs.h"
+#include <vector>
+#include "../optimization/optimization_oca.h"
+#include "../matrix.h"
+#include "sparse_vector.h"
+#include <iostream>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        struct nuclear_norm_regularizer
+        {
+            long first_dimension;
+            long nr;
+            long nc;
+            double regularization_strength;
+        };
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename structural_svm_problem
+        >
+    class cache_element_structural_svm 
+    {
+    public:
+
+        cache_element_structural_svm (
+        ) : prob(0), sample_idx(0), last_true_risk_computed(std::numeric_limits<double>::infinity()) {}
+
+        typedef typename structural_svm_problem::scalar_type scalar_type;
+        typedef typename structural_svm_problem::matrix_type matrix_type;
+        typedef typename structural_svm_problem::feature_vector_type feature_vector_type;
+
+        void init (
+            const structural_svm_problem* prob_,
+            const long idx
+        )
+        /*!
+            ensures
+                - This object will be a cache for the idx-th sample in the given
+                  structural_svm_problem.
+        !*/
+        {
+            prob = prob_;
+            sample_idx = idx;
+
+            loss.clear();
+            psi.clear();
+            lru_count.clear();
+
+            if (prob->get_max_cache_size() != 0)
+            {
+                prob->get_truth_joint_feature_vector(idx, true_psi);
+                compact_sparse_vector(true_psi);
+            }
+        }
+
+        void get_truth_joint_feature_vector_cached (
+            feature_vector_type& psi 
+        ) const
+        {
+            if (prob->get_max_cache_size() != 0)
+                psi = true_psi;
+            else
+                prob->get_truth_joint_feature_vector(sample_idx, psi);
+
+            if (is_matrix<feature_vector_type>::value)
+            {
+                DLIB_CASSERT((long)psi.size() == prob->get_num_dimensions(),
+                    "The dimensionality of your PSI vector doesn't match get_num_dimensions()");
+            }
+        }
+
+        void separation_oracle_cached (
+            const bool use_only_cache,
+            const bool skip_cache,
+            const scalar_type& saved_current_risk_gap,
+            const matrix_type& current_solution,
+            scalar_type& out_loss,
+            feature_vector_type& out_psi
+        ) const
+        {
+            const bool cache_enabled = prob->get_max_cache_size() != 0;
+
+            // Don't waste time computing this if the cache isn't going to be used.
+            const scalar_type dot_true_psi = cache_enabled ? dot(true_psi, current_solution) : 0;
+
+            scalar_type best_risk = -std::numeric_limits<scalar_type>::infinity();
+            unsigned long best_idx = 0;
+            long max_lru_count = 0;
+            if (cache_enabled)
+            {
+                // figure out which element in the cache is the best (i.e. has the biggest risk)
+                for (unsigned long i = 0; i < loss.size(); ++i)
+                {
+                    const scalar_type risk = loss[i] + dot(psi[i], current_solution) - dot_true_psi;
+                    if (risk > best_risk)
+                    {
+                        best_risk = risk;
+                        out_loss = loss[i];
+                        best_idx = i;
+                    }
+                    if (lru_count[i] > max_lru_count)
+                        max_lru_count = lru_count[i];
+                }
+
+                if (!skip_cache)
+                {
+                    // Check if the best psi vector in the cache is still good enough to use as
+                    // a proxy for the true separation oracle.  If the risk value has dropped
+                    // by enough to get into the stopping condition then the best psi isn't
+                    // good enough. 
+                    if ((best_risk + saved_current_risk_gap > last_true_risk_computed &&
+                        best_risk >= 0) || use_only_cache)
+                    {
+                        out_psi = psi[best_idx];
+                        lru_count[best_idx] = max_lru_count + 1;
+                        return;
+                    }
+                }
+            }
+
+
+            prob->separation_oracle(sample_idx, current_solution, out_loss, out_psi);
+            if (is_matrix<feature_vector_type>::value)
+            {
+                DLIB_CASSERT((long)out_psi.size() == prob->get_num_dimensions(),
+                    "The dimensionality of your PSI vector doesn't match get_num_dimensions()");
+            }
+
+            if (!cache_enabled)
+                return;
+
+            compact_sparse_vector(out_psi);
+
+            last_true_risk_computed = out_loss + dot(out_psi, current_solution) - dot_true_psi;
+
+            // If the separation oracle is only solved approximately then the result might
+            // not be as good as just selecting true_psi as the output.  So here we check
+            // if that is the case. 
+            if (last_true_risk_computed < 0 && best_risk < 0)
+            {
+                out_psi = true_psi;
+                out_loss = 0;
+            }
+            // Alternatively, an approximate separation oracle might not do as well as just
+            // selecting from the cache.  So if that is the case when just take the best
+            // element from the cache.
+            else if (last_true_risk_computed < best_risk) 
+            {
+                out_psi = psi[best_idx];
+                out_loss = loss[best_idx];
+                lru_count[best_idx] = max_lru_count + 1;
+            }
+            // if the cache is full
+            else if (loss.size() >= prob->get_max_cache_size())
+            {
+                // find least recently used cache entry for idx-th sample
+                const long i       = index_of_min(mat(lru_count));
+
+                // save our new data in the cache
+                loss[i] = out_loss;
+                psi[i]  = out_psi;
+
+                const long max_use = max(mat(lru_count));
+                // Make sure this new cache entry has the best lru count since we have used
+                // it most recently.
+                lru_count[i] = max_use + 1;
+            }
+            else
+            {
+                // In this case we just append the new psi into the cache.
+
+                loss.push_back(out_loss);
+                psi.push_back(out_psi);
+                long max_use = 1;
+                if (lru_count.size() != 0)
+                    max_use = max(mat(lru_count)) + 1;
+                lru_count.push_back(max_use);
+            }
+        }
+
+    private:
+        // Do nothing if T isn't actually a sparse vector
+        template <typename T> void compact_sparse_vector( T& ) const { }
+
+        template <
+            typename T,
+            typename U,
+            typename alloc
+            >
+        void compact_sparse_vector (
+            std::vector<std::pair<T,U>,alloc>& vect
+        ) const
+        {
+            // If the sparse vector has more entires than dimensions then it must have some 
+            // duplicate elements.  So compact them using make_sparse_vector_inplace().
+            if (vect.size() > (unsigned long)prob->get_num_dimensions())
+            {
+                make_sparse_vector_inplace(vect);
+                // make sure the vector doesn't use more RAM than is necessary
+                std::vector<std::pair<T,U>,alloc>(vect).swap(vect);
+            }
+        }
+
+        const structural_svm_problem* prob;
+
+        long sample_idx;
+
+        mutable feature_vector_type true_psi;
+        mutable std::vector<scalar_type> loss;
+        mutable std::vector<feature_vector_type> psi;
+        mutable std::vector<long> lru_count;
+        mutable double last_true_risk_computed;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type_,
+        typename feature_vector_type_ = matrix_type_
+        >
+    class structural_svm_problem : public oca_problem<matrix_type_> 
+    {
+    public:
+        /*!
+            CONVENTION
+                - C == get_c()
+                - eps == get_epsilon()
+                - max_iterations == get_max_iterations()
+                - if (skip_cache) then
+                    - we won't use the oracle cache when we need to evaluate the separation
+                      oracle. Instead, we will directly call the user supplied separation_oracle().
+
+                - get_max_cache_size() == max_cache_size
+
+                - if (cache.size() != 0) then
+                    - cache.size() == get_num_samples()
+                    - for all i: cache[i] == the cached results of calls to separation_oracle()
+                      for the i-th sample.
+        !*/
+
+        typedef matrix_type_ matrix_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef feature_vector_type_ feature_vector_type;
+
+        structural_svm_problem (
+        ) :
+            saved_current_risk_gap(0),
+            eps(0.001),
+            max_iterations(10000),
+            verbose(false),
+            skip_cache(true),
+            count_below_eps(0),
+            max_cache_size(5),
+            converged(false),
+            nuclear_norm_part(0),
+            cache_based_eps(std::numeric_limits<scalar_type>::infinity()),
+            C(1)
+        {}
+
+        scalar_type get_cache_based_epsilon (
+        ) const
+        {
+            return cache_based_eps;
+        }
+
+        void set_cache_based_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void structural_svm_problem::set_cache_based_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            cache_based_eps = eps_;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void structural_svm_problem::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const { return eps; }
+
+        unsigned long get_max_iterations (
+        ) const { return max_iterations; }
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ) 
+        {
+            max_iterations = max_iter;
+        }
+
+        void set_max_cache_size (
+            unsigned long max_size
+        )
+        {
+            max_cache_size = max_size;
+        }
+
+        unsigned long get_max_cache_size (
+        ) const { return max_cache_size; }
+
+        void be_verbose (
+        ) 
+        {
+            verbose = true;
+        }
+
+        void be_quiet(
+        )
+        {
+            verbose = false;
+        }
+
+        scalar_type get_c (
+        ) const { return C; }
+
+        void set_c (
+            scalar_type C_
+        ) 
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void structural_svm_problem::set_c()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_; 
+        }
+
+        void add_nuclear_norm_regularizer (
+            long first_dimension,
+            long rows,
+            long cols,
+            double regularization_strength
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 <= first_dimension && first_dimension < get_num_dimensions() &&
+                0 <= rows && 0 <= cols && rows*cols+first_dimension <= get_num_dimensions() &&
+                0 < regularization_strength,
+                "\t void structural_svm_problem::add_nuclear_norm_regularizer()"
+                << "\n\t Invalid arguments were given to this function."
+                << "\n\t first_dimension:         " << first_dimension 
+                << "\n\t rows:                    " << rows 
+                << "\n\t cols:                    " << cols 
+                << "\n\t get_num_dimensions():    " << get_num_dimensions() 
+                << "\n\t regularization_strength: " << regularization_strength 
+                << "\n\t this: " << this
+                );
+
+            impl::nuclear_norm_regularizer temp;
+            temp.first_dimension = first_dimension;
+            temp.nr = rows;
+            temp.nc = cols;
+            temp.regularization_strength = regularization_strength;
+            nuclear_norm_regularizers.push_back(temp);
+        }
+
+        unsigned long num_nuclear_norm_regularizers (
+        ) const { return nuclear_norm_regularizers.size(); }
+
+        void clear_nuclear_norm_regularizers (
+        ) { nuclear_norm_regularizers.clear(); }
+
+        virtual long get_num_dimensions (
+        ) const = 0;
+
+        virtual long get_num_samples (
+        ) const = 0;
+
+        virtual void get_truth_joint_feature_vector (
+            long idx,
+            feature_vector_type& psi 
+        ) const = 0;
+
+        virtual void separation_oracle (
+            const long idx,
+            const matrix_type& current_solution,
+            scalar_type& loss,
+            feature_vector_type& psi
+        ) const = 0;
+
+    private:
+
+        virtual bool risk_has_lower_bound (
+            scalar_type& lower_bound
+        ) const 
+        { 
+            lower_bound = 0;
+            return true; 
+        }
+
+        virtual bool optimization_status (
+            scalar_type current_objective_value,
+            scalar_type current_error_gap,
+            scalar_type current_risk_value,
+            scalar_type current_risk_gap,
+            unsigned long num_cutting_planes,
+            unsigned long num_iterations
+        ) const 
+        {
+            if (verbose)
+            {
+                using namespace std;
+                if (nuclear_norm_regularizers.size() != 0)
+                {
+                    cout << "objective:             " << current_objective_value << endl;
+                    cout << "objective gap:         " << current_error_gap << endl;
+                    cout << "risk:                  " << current_risk_value-nuclear_norm_part << endl;
+                    cout << "risk+nuclear norm:     " << current_risk_value << endl;
+                    cout << "risk+nuclear norm gap: " << current_risk_gap << endl;
+                    cout << "num planes:            " << num_cutting_planes << endl;
+                    cout << "iter:                  " << num_iterations << endl;
+                }
+                else
+                {
+                    cout << "objective:     " << current_objective_value << endl;
+                    cout << "objective gap: " << current_error_gap << endl;
+                    cout << "risk:          " << current_risk_value << endl;
+                    cout << "risk gap:      " << current_risk_gap << endl;
+                    cout << "num planes:    " << num_cutting_planes << endl;
+                    cout << "iter:          " << num_iterations << endl;
+                }
+                cout << endl;
+            }
+
+            if (num_iterations >= max_iterations)
+                return true;
+
+            saved_current_risk_gap = current_risk_gap;
+
+            if (converged)
+            {
+                return (current_risk_gap < std::max(cache_based_eps,cache_based_eps*current_risk_value)) || 
+                       (current_risk_gap == 0);
+            }
+
+            if (current_risk_gap < eps)
+            {
+                // Only stop when we see that the risk gap is small enough on a non-cached
+                // iteration.  But even then, if we are supposed to do the cache based
+                // refinement then we just mark that we have "converged" to avoid further
+                // calls to the separation oracle and run all subsequent iterations off the
+                // cache.
+                if (skip_cache || max_cache_size == 0)
+                {
+                    converged = true;
+                    skip_cache = false;
+                    return (current_risk_gap < std::max(cache_based_eps,cache_based_eps*current_risk_value)) ||
+                           (current_risk_gap == 0);
+                }
+
+                ++count_below_eps;
+
+                // Only disable the cache if we have seen a few consecutive iterations that
+                // look to have converged.
+                if (count_below_eps > 1)
+                {
+                    // Instead of stopping we shouldn't use the cache on the next iteration.  This way
+                    // we can be sure to have the best solution rather than assuming the cache is up-to-date
+                    // enough.
+                    skip_cache = true;
+                    count_below_eps = 0;
+                }
+            }
+            else
+            {
+                count_below_eps = 0;
+                skip_cache = false;
+            }
+
+            return false;
+        }
+
+        virtual void get_risk (
+            matrix_type& w,
+            scalar_type& risk,
+            matrix_type& subgradient
+        ) const 
+        {
+            feature_vector_type ftemp;
+            const unsigned long num = get_num_samples();
+
+            // initialize the cache and compute psi_true.
+            if (cache.size() == 0)
+            {
+                cache.resize(get_num_samples());
+                for (unsigned long i = 0; i < cache.size(); ++i)
+                    cache[i].init(this,i);
+
+                psi_true.set_size(w.size(),1);
+                psi_true = 0;
+
+                for (unsigned long i = 0; i < num; ++i)
+                {
+                    cache[i].get_truth_joint_feature_vector_cached(ftemp);
+
+                    subtract_from(psi_true, ftemp);
+                }
+            }
+
+            subgradient = psi_true;
+            scalar_type total_loss = 0;
+            call_separation_oracle_on_all_samples(w,subgradient,total_loss);
+
+            subgradient /= num;
+            total_loss /= num;
+            risk = total_loss + dot(subgradient,w);
+
+            if (nuclear_norm_regularizers.size() != 0)
+            {
+                matrix_type grad; 
+                scalar_type obj;
+                compute_nuclear_norm_parts(w, grad, obj);
+                risk += obj;
+                subgradient += grad;
+            }
+        }
+
+        virtual void call_separation_oracle_on_all_samples (
+            const matrix_type& w,
+            matrix_type& subgradient,
+            scalar_type& total_loss
+        ) const
+        {
+            feature_vector_type ftemp;
+            const unsigned long num = get_num_samples();
+            for (unsigned long i = 0; i < num; ++i)
+            {
+                scalar_type loss;
+                separation_oracle_cached(i, w, loss, ftemp);
+                total_loss += loss;
+                add_to(subgradient, ftemp);
+            }
+        }
+
+    protected:
+
+        void compute_nuclear_norm_parts(
+            const matrix_type& m,
+            matrix_type& grad,
+            scalar_type& obj
+        ) const
+        {
+            obj = 0;
+            grad.set_size(m.size(), 1);
+            grad = 0;
+
+            matrix<double> u,v,w,f;
+            nuclear_norm_part = 0;
+            for (unsigned long i = 0; i < nuclear_norm_regularizers.size(); ++i)
+            {
+                const long nr = nuclear_norm_regularizers[i].nr;
+                const long nc = nuclear_norm_regularizers[i].nc;
+                const long size = nr*nc;
+                const long idx = nuclear_norm_regularizers[i].first_dimension;
+                const double strength = nuclear_norm_regularizers[i].regularization_strength;
+
+                f = matrix_cast<double>(reshape(rowm(m, range(idx, idx+size-1)), nr, nc));
+                svd3(f, u,w,v);
+
+
+                const double norm = sum(w);
+                obj += strength*norm;
+                nuclear_norm_part += strength*norm/C;
+
+                f = u*trans(v);
+
+                set_rowm(grad, range(idx, idx+size-1)) = matrix_cast<double>(strength*reshape_to_column_vector(f));
+            }
+
+            obj /= C;
+            grad /= C;
+        }
+
+        void separation_oracle_cached (
+            const long idx,
+            const matrix_type& current_solution,
+            scalar_type& loss,
+            feature_vector_type& psi
+        ) const 
+        {
+            cache[idx].separation_oracle_cached(converged,
+                                                skip_cache, 
+                                                saved_current_risk_gap,
+                                                current_solution,
+                                                loss,
+                                                psi);
+        }
+
+        std::vector<impl::nuclear_norm_regularizer> nuclear_norm_regularizers;
+
+        mutable scalar_type saved_current_risk_gap;
+        mutable matrix_type psi_true;
+        scalar_type eps;
+        unsigned long max_iterations;
+        mutable bool verbose;
+
+
+        mutable std::vector<cache_element_structural_svm<structural_svm_problem> > cache;
+        mutable bool skip_cache;
+        mutable int count_below_eps;
+        unsigned long max_cache_size;
+        mutable bool converged;
+        mutable double nuclear_norm_part;
+        scalar_type cache_based_eps;
+
+        scalar_type C;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_PRObLEM_Hh_
+
diff --git a/ml/dlib/dlib/svm/structural_svm_problem_abstract.h b/ml/dlib/dlib/svm/structural_svm_problem_abstract.h
new file mode 100644
index 000000000..20b3d73a7
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_problem_abstract.h
@@ -0,0 +1,348 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_SVM_PRObLEM_ABSTRACT_Hh_
+#ifdef DLIB_STRUCTURAL_SVM_PRObLEM_ABSTRACT_Hh_
+
+#include "../optimization/optimization_oca_abstract.h"
+#include "sparse_vector_abstract.h"
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type_,
+        typename feature_vector_type_ = matrix_type_
+        >
+    class structural_svm_problem : public oca_problem<matrix_type_> 
+    {
+    public:
+        /*!
+            REQUIREMENTS ON matrix_type_
+                - matrix_type_ == a dlib::matrix capable of storing column vectors
+
+            REQUIREMENTS ON feature_vector_type_ 
+                - feature_vector_type_ == a dlib::matrix capable of storing column vectors
+                  or an unsorted sparse vector type as defined in dlib/svm/sparse_vector_abstract.h.
+
+            INITIAL VALUE
+                - get_epsilon() == 0.001
+                - get_max_iterations() == 10000
+                - get_max_cache_size() == 5
+                - get_c() == 1
+                - get_cache_based_epsilon() == std::numeric_limits<scalar_type>::infinity()
+                  (I.e. the cache based epsilon feature is disabled)
+                - num_nuclear_norm_regularizers() == 0
+                - This object will not be verbose
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for solving the optimization problem associated with
+                a structural support vector machine.  A structural SVM is a supervised
+                machine learning method for learning to predict complex outputs.  This is
+                contrasted with a binary classifier which makes only simple yes/no
+                predictions.  A structural SVM, on the other hand, can learn to predict
+                complex outputs such as entire parse trees or DNA sequence alignments.  To
+                do this, it learns a function F(x,y) which measures how well a particular
+                data sample x matches a label y.  When used for prediction, the best label
+                for a new x is given by the y which maximizes F(x,y).   
+
+                To use this object you inherit from it, provide implementations of its four
+                pure virtual functions, and then pass your object to the oca optimizer.
+                Also, you should only pass an instance of this object to the oca optimizer
+                once.  That is, the act of using a structural_svm_problem instance with the
+                oca solver "uses" the structural_svm_problem instance.  If you want to
+                solve the same problem multiple times then you must use a fresh instance of
+                your structural_svm_problem.
+
+
+                To define the optimization problem precisely, we first introduce some notation:
+                    - let PSI(x,y)    == the joint feature vector for input x and a label y.
+                    - let F(x,y|w)    == dot(w,PSI(x,y)).  
+                    - let LOSS(idx,y) == the loss incurred for predicting that the idx-th training 
+                      sample has a label of y.  Note that LOSS() should always be >= 0 and should
+                      become exactly 0 when y is the correct label for the idx-th sample.
+                    - let x_i == the i-th training sample.
+                    - let y_i == the correct label for the i-th training sample.
+                    - The number of data samples is N.
+
+                Then the optimization problem solved using this object is the following:
+                    Minimize: h(w) == 0.5*dot(w,w) + C*R(w)
+
+                    Where R(w) == sum from i=1 to N: 1/N * sample_risk(i,w)
+                    and sample_risk(i,w) == max over all Y: LOSS(i,Y) + F(x_i,Y|w) - F(x_i,y_i|w)
+                    and C > 0
+
+                
+
+                For an introduction to structured support vector machines you should consult 
+                the following paper: 
+                    Predicting Structured Objects with Support Vector Machines by 
+                    Thorsten Joachims, Thomas Hofmann, Yisong Yue, and Chun-nam Yu
+
+                For a more detailed discussion of the particular algorithm implemented by this
+                object see the following paper:  
+                    T. Joachims, T. Finley, Chun-Nam Yu, Cutting-Plane Training of Structural SVMs, 
+                    Machine Learning, 77(1):27-59, 2009.
+
+                    Note that this object is essentially a tool for solving the 1-Slack structural
+                    SVM with margin-rescaling.  Specifically, see Algorithm 3 in the above referenced 
+                    paper.
+        !*/
+
+        typedef matrix_type_ matrix_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef feature_vector_type_ feature_vector_type;
+
+        structural_svm_problem (
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer 
+                  to execute.  Specifically, the algorithm stops when the average sample
+                  risk (i.e. R(w) as defined above) is within epsilon of its optimal value.
+
+                  Also note that sample risk is an upper bound on a sample's loss.  So
+                  you can think of this epsilon value as saying "solve the optimization
+                  problem until the average loss per sample is within epsilon of it's 
+                  optimal value".
+        !*/
+
+        scalar_type get_cache_based_epsilon (
+        ) const;
+        /*!
+            ensures
+                - if (get_max_cache_size() != 0) then
+                    - The solver will not stop when the average sample risk is within
+                      get_epsilon() of its optimal value.  Instead, it will keep running
+                      but will run the optimizer completely on the cache until the average
+                      sample risk is within #get_cache_based_epsilon() of its optimal
+                      value.  This means that it will perform this additional refinement in
+                      the solution accuracy without making any additional calls to the
+                      separation_oracle().  This is useful when using a nuclear norm
+                      regularization term because it allows you to quickly solve the
+                      optimization problem to a high precision, which in the case of a
+                      nuclear norm regularized problem means that many of the learned
+                      matrices will be low rank or very close to low rank due to the
+                      nuclear norm regularizer.  This may not happen without solving the
+                      problem to a high accuracy or their ranks may be difficult to
+                      determine, so the extra accuracy given by the cache based refinement
+                      is very useful.  Finally, note that we include the nuclear norm term
+                      as part of the "risk" for the purposes of determining when to stop.  
+                - else
+                    - The value of #get_cache_based_epsilon() has no effect.
+        !*/
+
+        void set_cache_based_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_cache_based_epsilon() == eps
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        unsigned long get_max_iterations (
+        ); 
+        /*!
+            ensures
+                - returns the maximum number of iterations the SVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        void set_max_cache_size (
+            unsigned long max_size
+        );
+        /*!
+            ensures
+                - #get_max_cache_size() == max_size
+        !*/
+
+        unsigned long get_max_cache_size (
+        ) const; 
+        /*!
+            ensures
+                - Returns the number of joint feature vectors per training sample kept in 
+                  the separation oracle cache.  This cache is used to avoid unnecessary 
+                  calls to the user supplied separation_oracle() function.  Note that a 
+                  value of 0 means that caching is not used at all.  This is appropriate 
+                  if the separation oracle is cheap to evaluate. 
+        !*/
+
+        void add_nuclear_norm_regularizer (
+            long first_dimension,
+            long rows,
+            long cols,
+            double regularization_strength
+        );
+        /*!
+            requires
+                - 0 <= first_dimension < get_num_dimensions()
+                - 0 <= rows
+                - 0 <= cols
+                - first_dimension+rows*cols <= get_num_dimensions()
+                - 0 < regularization_strength
+            ensures
+                - Adds a nuclear norm regularization term to the optimization problem
+                  solved by this object.  That is, instead of solving:
+                    Minimize: h(w) == 0.5*dot(w,w) + C*R(w)
+                  this object will solve:
+                    Minimize: h(w) == 0.5*dot(w,w) + C*R(w) + regularization_strength*nuclear_norm_of(part of w)
+                  where "part of w" is the part of w indicated by the arguments to this
+                  function. In particular, the part of w included in the nuclear norm is
+                  exactly the matrix reshape(rowm(w, range(first_dimension, first_dimension+rows*cols-1)), rows, cols).
+                  Therefore, if you think of the w vector as being the concatenation of a
+                  bunch of matrices then you can use multiple calls to add_nuclear_norm_regularizer() 
+                  to add nuclear norm regularization terms to any of the matrices packed into w.
+                - #num_nuclear_norm_regularizers() == num_nuclear_norm_regularizers() + 1
+        !*/
+
+        unsigned long num_nuclear_norm_regularizers (
+        ) const; 
+        /*!
+            ensures
+                - returns the number of nuclear norm regularizers that are currently a part
+                  of this optimization problem.  That is, returns the number of times
+                  add_nuclear_norm_regularizer() has been called since the last call to
+                  clear_nuclear_norm_regularizers() or object construction, whichever is
+                  most recent.
+        !*/
+
+        void clear_nuclear_norm_regularizers (
+        );
+        /*!
+            ensures
+                - #num_nuclear_norm_regularizers() == 0
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet(
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        scalar_type get_c (
+        ) const; 
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter that 
+                  determines the trade off between trying to fit the training data 
+                  exactly or allowing more errors but hopefully improving the 
+                  generalization of the resulting classifier.  Larger values encourage 
+                  exact fitting while smaller values of C may encourage better 
+                  generalization. 
+        !*/
+
+        void set_c (
+            scalar_type C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() == C
+        !*/
+
+    // --------------------------------
+    //     User supplied routines
+    // --------------------------------
+
+        virtual long get_num_dimensions (
+        ) const = 0;
+        /*!
+            ensures
+                - returns the dimensionality of a joint feature vector
+        !*/
+
+        virtual long get_num_samples (
+        ) const = 0;
+        /*!
+            ensures
+                - returns the number of training samples in this problem. 
+        !*/
+
+        virtual void get_truth_joint_feature_vector (
+            long idx,
+            feature_vector_type& psi 
+        ) const = 0;
+        /*!
+            requires
+                - 0 <= idx < get_num_samples()
+            ensures
+                - #psi == PSI(x_idx, y_idx)
+                  (i.e. the joint feature vector for the idx-th training sample its true label.)
+        !*/
+
+        virtual void separation_oracle (
+            const long idx,
+            const matrix_type& current_solution,
+            scalar_type& loss,
+            feature_vector_type& psi
+        ) const = 0;
+        /*!
+            requires
+                - 0 <= idx < get_num_samples()
+                - current_solution.size() == get_num_dimensions()
+            ensures
+                - runs the separation oracle on the idx-th sample.  We define this as follows: 
+                    - let X           == the idx-th training sample.
+                    - let PSI(X,y)    == the joint feature vector for input X and an arbitrary label y.
+                    - let F(X,y)      == dot(current_solution,PSI(X,y)).  
+                    - let LOSS(idx,y) == the loss incurred for predicting that the idx-th sample
+                      has a label of y.  Note that LOSS() should always be >= 0 and should
+                      become exactly 0 when y is the correct label for the idx-th sample.
+
+                        Then the separation oracle finds a Y such that: 
+                            Y = argmax over all y: LOSS(idx,y) + F(X,y) 
+                            (i.e. It finds the label which maximizes the above expression.)
+
+                        Finally, we can define the outputs of this function as:
+                        - #loss == LOSS(idx,Y) 
+                        - #psi == PSI(X,Y) 
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_PRObLEM_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/structural_svm_problem_threaded.h b/ml/dlib/dlib/svm/structural_svm_problem_threaded.h
new file mode 100644
index 000000000..e981ba8d9
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_problem_threaded.h
@@ -0,0 +1,157 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_Hh_
+#define DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_Hh_
+
+#include "structural_svm_problem_threaded_abstract.h"
+#include "../algs.h"
+#include <vector>
+#include "structural_svm_problem.h"
+#include "../matrix.h"
+#include "sparse_vector.h"
+#include <iostream>
+#include "../threads.h"
+#include "../misc_api.h"
+#include "../statistics.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type_,
+        typename feature_vector_type_ = matrix_type_
+        >
+    class structural_svm_problem_threaded : public structural_svm_problem<matrix_type_,feature_vector_type_> 
+    {
+    public:
+
+        typedef matrix_type_ matrix_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef feature_vector_type_ feature_vector_type;
+
+        explicit structural_svm_problem_threaded (
+            unsigned long num_threads
+        ) :
+            tp(num_threads),
+            num_iterations_executed(0)
+        {}
+
+        unsigned long get_num_threads (
+        ) const { return tp.num_threads_in_pool(); }
+
+    private:
+
+        struct binder
+        {
+            binder (
+                const structural_svm_problem_threaded& self_,
+                const matrix_type& w_,
+                matrix_type& subgradient_,
+                scalar_type& total_loss_,
+                bool buffer_subgradients_locally_
+            ) : self(self_), w(w_), subgradient(subgradient_), total_loss(total_loss_),
+                buffer_subgradients_locally(buffer_subgradients_locally_){}
+
+            void call_oracle (
+                long begin,
+                long end
+            ) 
+            {
+                // If we are only going to call the separation oracle once then don't run
+                // the slightly more complex for loop version of this code.  Or if we just
+                // don't want to run the complex buffering one.  The code later on decides
+                // if we should do the buffering based on how long it takes to execute.  We
+                // do this because, when the subgradient is really high dimensional it can
+                // take a lot of time to add them together.  So we might want to avoid
+                // doing that.
+                if (end-begin <= 1 || !buffer_subgradients_locally)
+                {
+                    scalar_type loss;
+                    feature_vector_type ftemp;
+                    for (long i = begin; i < end; ++i)
+                    {
+                        self.separation_oracle_cached(i, w, loss, ftemp);
+
+                        auto_mutex lock(self.accum_mutex);
+                        total_loss += loss;
+                        add_to(subgradient, ftemp);
+                    }
+                }
+                else
+                {
+                    scalar_type loss = 0;
+                    matrix_type faccum(subgradient.size(),1);
+                    faccum = 0;
+
+                    feature_vector_type ftemp;
+
+                    for (long i = begin; i < end; ++i)
+                    {
+                        scalar_type loss_temp;
+                        self.separation_oracle_cached(i, w, loss_temp, ftemp);
+                        loss += loss_temp;
+                        add_to(faccum, ftemp);
+                    }
+
+                    auto_mutex lock(self.accum_mutex);
+                    total_loss += loss;
+                    add_to(subgradient, faccum);
+                }
+            }
+
+            const structural_svm_problem_threaded& self;
+            const matrix_type& w;
+            matrix_type& subgradient;
+            scalar_type& total_loss;
+            bool buffer_subgradients_locally;
+        };
+
+
+        virtual void call_separation_oracle_on_all_samples (
+            const matrix_type& w,
+            matrix_type& subgradient,
+            scalar_type& total_loss
+        ) const
+        {
+            ++num_iterations_executed;
+
+            const uint64 start_time = ts.get_timestamp();
+
+            bool buffer_subgradients_locally = with_buffer_time.mean() < without_buffer_time.mean();
+
+            // every 50 iterations we should try to flip the buffering scheme to see if
+            // doing it the other way might be better.  
+            if ((num_iterations_executed%50) == 0)
+            {
+                buffer_subgradients_locally = !buffer_subgradients_locally;
+            }
+
+            binder b(*this, w, subgradient, total_loss, buffer_subgradients_locally);
+            parallel_for_blocked(tp, 0, this->get_num_samples(), b, &binder::call_oracle);
+
+            const uint64 stop_time = ts.get_timestamp();
+
+            if (buffer_subgradients_locally)
+                with_buffer_time.add(stop_time-start_time);
+            else
+                without_buffer_time.add(stop_time-start_time);
+
+        }
+
+        mutable thread_pool tp;
+        mutable mutex accum_mutex;
+        mutable timestamper ts;
+        mutable running_stats<double> with_buffer_time;
+        mutable running_stats<double> without_buffer_time;
+        mutable unsigned long num_iterations_executed;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/structural_svm_problem_threaded_abstract.h b/ml/dlib/dlib/svm/structural_svm_problem_threaded_abstract.h
new file mode 100644
index 000000000..3cfc6a6eb
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_problem_threaded_abstract.h
@@ -0,0 +1,68 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_ABSTRACT_Hh_
+#ifdef DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_ABSTRACT_Hh_
+
+#include "structural_svm_problem_abstract.h"
+#include "../matrix.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type_,
+        typename feature_vector_type_ = matrix_type_
+        >
+    class structural_svm_problem_threaded : public structural_svm_problem<matrix_type_,feature_vector_type_> 
+    {
+    public:
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is identical to the structural_svm_problem object defined in 
+                dlib/svm/structural_svm_problem_abstract.h except that its constructor
+                takes a number which defines how many threads will be used to make concurrent
+                calls to the separation_oracle() routine.  
+
+                So this object lets you take advantage of a multi-core system.  You should
+                set the num_threads parameter equal to the number of available cores.  Note
+                that the separation_oracle() function which you provide must be thread safe
+                if you are to use this version of the structural_svm_problem.  In
+                particular, it must be safe to call separation_oracle() concurrently from
+                different threads.  However, it is guaranteed that different threads will
+                never make concurrent calls to separation_oracle() using the same idx value
+                (i.e. the first argument).  
+        !*/
+
+        typedef matrix_type_ matrix_type;
+        typedef typename matrix_type::type scalar_type;
+        typedef feature_vector_type_ feature_vector_type;
+
+        structural_svm_problem (
+            unsigned long num_threads
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+                - #get_num_threads() == num_threads
+        !*/
+
+        unsigned long get_num_threads (
+        ) const; 
+        /*!
+            ensures
+                - Returns the number of threads which will be used to make concurrent
+                  calls to the separation_oracle() function.
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_PRObLEM_THREADED_ABSTRACT_Hh_
+
+
+
diff --git a/ml/dlib/dlib/svm/structural_svm_sequence_labeling_problem.h b/ml/dlib/dlib/svm/structural_svm_sequence_labeling_problem.h
new file mode 100644
index 000000000..68dff66f5
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_sequence_labeling_problem.h
@@ -0,0 +1,281 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_Hh_
+#define DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_Hh_
+
+
+#include "structural_svm_sequence_labeling_problem_abstract.h"
+#include "../matrix.h"
+#include "sequence_labeler.h"
+#include <vector>
+#include "structural_svm_problem_threaded.h"
+
+// ----------------------------------------------------------------------------------------
+
+namespace dlib
+{
+
+    namespace fe_helpers
+    {
+
+    // ----------------------------------------------------------------------------------------
+
+        struct get_feats_functor 
+        {
+            get_feats_functor(std::vector<std::pair<unsigned long, double> >& feats_) : feats(feats_) {}
+
+            inline void operator() (
+                unsigned long feat_index,
+                double feat_value
+            )
+            {
+                feats.push_back(std::make_pair(feat_index, feat_value));
+            }
+
+            inline void operator() (
+                unsigned long feat_index
+            )
+            {
+                feats.push_back(std::make_pair(feat_index, 1));
+            }
+
+            std::vector<std::pair<unsigned long, double> >& feats;
+        };
+
+    // ----------------------------------------------------------------------------------------
+
+        template <typename feature_extractor, typename sequence_type, typename EXP2> 
+        void get_feature_vector(
+            std::vector<std::pair<unsigned long, double> >& feats,
+            const feature_extractor& fe,
+            const sequence_type& sequence,
+            const matrix_exp<EXP2>& candidate_labeling,
+            unsigned long position
+        )
+        {
+            get_feats_functor funct(feats);
+            fe.get_features(funct, sequence,candidate_labeling, position);
+        }
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename feature_extractor
+        >
+    class structural_svm_sequence_labeling_problem : noncopyable,
+        public structural_svm_problem_threaded<matrix<double,0,1>, std::vector<std::pair<unsigned long,double> > >
+    {
+    public:
+        typedef matrix<double,0,1> matrix_type;
+        typedef std::vector<std::pair<unsigned long, double> > feature_vector_type;
+
+        typedef typename feature_extractor::sequence_type sequence_type;
+
+        structural_svm_sequence_labeling_problem(
+            const std::vector<sequence_type>& samples_,
+            const std::vector<std::vector<unsigned long> >& labels_,
+            const feature_extractor& fe_,
+            unsigned long num_threads = 2
+        ) :
+            structural_svm_problem_threaded<matrix_type,feature_vector_type>(num_threads),
+            samples(samples_),
+            labels(labels_),
+            fe(fe_)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_sequence_labeling_problem(samples,labels) == true &&
+                        contains_invalid_labeling(fe, samples, labels) == false,
+                        "\t structural_svm_sequence_labeling_problem::structural_svm_sequence_labeling_problem()"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t samples.size(): " << samples.size() 
+                        << "\n\t is_sequence_labeling_problem(samples,labels): " << is_sequence_labeling_problem(samples,labels)
+                        << "\n\t contains_invalid_labeling(fe,samples,labels): " << contains_invalid_labeling(fe,samples,labels)
+                        << "\n\t this: " << this
+                        );
+
+#ifdef ENABLE_ASSERTS
+            for (unsigned long i = 0; i < labels.size(); ++i)
+            {
+                for (unsigned long j = 0; j < labels[i].size(); ++j)
+                {
+                    // make sure requires clause is not broken
+                    DLIB_ASSERT(labels[i][j] < fe.num_labels(),
+                                "\t structural_svm_sequence_labeling_problem::structural_svm_sequence_labeling_problem()"
+                                << "\n\t The given labels in labels are invalid."
+                                << "\n\t labels[i][j]: " << labels[i][j] 
+                                << "\n\t fe.num_labels(): " << fe.num_labels()
+                                << "\n\t i: " << i 
+                                << "\n\t j: " << j 
+                                << "\n\t this: " << this
+                                );
+                }
+            }
+#endif
+
+            loss_values.assign(num_labels(), 1);
+
+        }
+
+        unsigned long num_labels (
+        ) const { return fe.num_labels(); }
+
+        double get_loss (
+            unsigned long label
+        ) const 
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(label < num_labels(),
+                        "\t void structural_svm_sequence_labeling_problem::get_loss()"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t label:        " << label 
+                        << "\n\t num_labels(): " << num_labels() 
+                        << "\n\t this:         " << this
+                        );
+
+            return loss_values[label]; 
+        }
+
+        void set_loss (
+            unsigned long label,
+            double value
+        )  
+        { 
+            // make sure requires clause is not broken
+            DLIB_ASSERT(label < num_labels() && value >= 0,
+                        "\t void structural_svm_sequence_labeling_problem::set_loss()"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t label:        " << label 
+                        << "\n\t num_labels(): " << num_labels() 
+                        << "\n\t value:        " << value 
+                        << "\n\t this:         " << this
+                        );
+
+            loss_values[label] = value;
+        }
+
+    private:
+        virtual long get_num_dimensions (
+        ) const 
+        {
+            return fe.num_features();
+        }
+
+        virtual long get_num_samples (
+        ) const 
+        {
+            return samples.size();
+        }
+
+        void get_joint_feature_vector (
+            const sequence_type& sample, 
+            const std::vector<unsigned long>& label,
+            feature_vector_type& psi
+        ) const 
+        {
+            psi.clear();
+
+            const int order = fe.order();
+
+            matrix<unsigned long,0,1> candidate_labeling; 
+            for (unsigned long i = 0; i < sample.size(); ++i)
+            {
+                candidate_labeling = rowm(mat(label), range(i, std::max((int)i-order,0)));
+
+                fe_helpers::get_feature_vector(psi,fe,sample,candidate_labeling, i);
+            }
+        }
+
+        virtual void get_truth_joint_feature_vector (
+            long idx,
+            feature_vector_type& psi 
+        ) const 
+        {
+            get_joint_feature_vector(samples[idx], labels[idx], psi);
+        }
+
+        class map_prob
+        {
+        public:
+            unsigned long order() const { return fe.order(); }
+            unsigned long num_states() const { return fe.num_labels(); }
+
+            map_prob(
+                const sequence_type& sequence_,
+                const std::vector<unsigned long>& label_,
+                const feature_extractor& fe_,
+                const matrix<double,0,1>& weights_,
+                const std::vector<double>& loss_values_
+            ) :
+                sequence(sequence_),
+                label(label_),
+                fe(fe_),
+                weights(weights_),
+                loss_values(loss_values_)
+            {
+            }
+
+            unsigned long number_of_nodes(
+            ) const
+            {
+                return sequence.size();
+            }
+
+            template <
+                typename EXP 
+                >
+            double factor_value (
+                unsigned long node_id,
+                const matrix_exp<EXP>& node_states
+            ) const
+            {
+                if (dlib::impl::call_reject_labeling_if_exists(fe, sequence,  node_states, node_id))
+                    return -std::numeric_limits<double>::infinity();
+
+                double loss = 0;
+                if (node_states(0) != label[node_id])
+                    loss = loss_values[label[node_id]];
+
+                return fe_helpers::dot(weights, fe, sequence, node_states, node_id) + loss;
+            }
+
+            const sequence_type& sequence;
+            const std::vector<unsigned long>& label;
+            const feature_extractor& fe;
+            const matrix<double,0,1>& weights;
+            const std::vector<double>& loss_values;
+        };
+
+        virtual void separation_oracle (
+            const long idx,
+            const matrix_type& current_solution,
+            scalar_type& loss,
+            feature_vector_type& psi
+        ) const
+        {
+            std::vector<unsigned long> y;
+            find_max_factor_graph_viterbi(map_prob(samples[idx],labels[idx],fe,current_solution,loss_values), y);
+
+            loss = 0;
+            for (unsigned long i = 0; i < y.size(); ++i)
+            {
+                if (y[i] != labels[idx][i])
+                    loss += loss_values[labels[idx][i]];
+            }
+
+            get_joint_feature_vector(samples[idx], y, psi);
+        }
+
+        const std::vector<sequence_type>& samples;
+        const std::vector<std::vector<unsigned long> >& labels;
+        const feature_extractor& fe;
+        std::vector<double> loss_values;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_Hh_
+
diff --git a/ml/dlib/dlib/svm/structural_svm_sequence_labeling_problem_abstract.h b/ml/dlib/dlib/svm/structural_svm_sequence_labeling_problem_abstract.h
new file mode 100644
index 000000000..b46a55350
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_svm_sequence_labeling_problem_abstract.h
@@ -0,0 +1,110 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_ABSTRACT_Hh_
+#ifdef DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_ABSTRACT_Hh_
+
+
+#include "../matrix.h"
+#include <vector>
+#include "structural_svm_problem_threaded_abstract.h"
+#include "sequence_labeler_abstract.h"
+
+// ----------------------------------------------------------------------------------------
+
+namespace dlib
+{
+
+    template <
+        typename feature_extractor
+        >
+    class structural_svm_sequence_labeling_problem : noncopyable,
+                                                     public structural_svm_problem_threaded<matrix<double,0,1>, 
+                                                            std::vector<std::pair<unsigned long,double> > >
+    {
+        /*!
+            REQUIREMENTS ON feature_extractor
+                It must be an object that implements an interface compatible with 
+                the example_feature_extractor defined in dlib/svm/sequence_labeler_abstract.h.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for learning the weight vector needed to use
+                a sequence_labeler object.  
+
+                It learns the parameter vector by formulating the problem as a structural 
+                SVM problem.  The general approach is discussed in the paper:
+                    Hidden Markov Support Vector Machines by 
+                    Y. Altun, I. Tsochantaridis, T. Hofmann
+                While the particular optimization strategy used is the method from: 
+                    T. Joachims, T. Finley, Chun-Nam Yu, Cutting-Plane Training of 
+                    Structural SVMs, Machine Learning, 77(1):27-59, 2009.
+        !*/
+
+    public:
+        typedef typename feature_extractor::sequence_type sequence_type;
+
+        structural_svm_sequence_labeling_problem(
+            const std::vector<sequence_type>& samples,
+            const std::vector<std::vector<unsigned long> >& labels,
+            const feature_extractor& fe,
+            unsigned long num_threads = 2
+        );
+        /*!
+            requires
+                - is_sequence_labeling_problem(samples, labels) == true
+                - contains_invalid_labeling(fe, samples, labels) == false
+                - for all valid i and j: labels[i][j] < fe.num_labels()
+            ensures
+                - This object attempts to learn a mapping from the given samples to the 
+                  given labels.  In particular, it attempts to learn to predict labels[i] 
+                  based on samples[i].  Or in other words, this object can be used to learn 
+                  a parameter vector, w, such that a sequence_labeler declared as:
+                    sequence_labeler<feature_extractor> labeler(w,fe)
+                  results in a labeler object which attempts to compute the following mapping:
+                    labels[i] == labeler(samples[i])
+                - This object will use num_threads threads during the optimization 
+                  procedure.  You should set this parameter equal to the number of 
+                  available processing cores on your machine.
+                - #num_labels() == fe.num_labels()
+                - for all valid i: #get_loss(i) == 1
+        !*/
+
+        unsigned long num_labels (
+        ) const;
+        /*!
+            ensures
+                - returns the number of possible labels in this learning problem
+        !*/
+
+        double get_loss (
+            unsigned long label
+        ) const;
+        /*!
+            requires
+                - label < num_labels()
+            ensures
+                - returns the loss incurred when a sequence element with the given
+                  label is misclassified.  This value controls how much we care about
+                  correctly classifying this type of label.  Larger loss values indicate
+                  that we care more strongly than smaller values.
+        !*/
+
+        void set_loss (
+            unsigned long label,
+            double value
+        );
+        /*!
+            requires
+                - label < num_labels()
+                - value >= 0
+            ensures
+                - #get_loss(label) == value
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_SVM_SEQUENCE_LaBELING_PROBLEM_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/structural_track_association_trainer.h b/ml/dlib/dlib/svm/structural_track_association_trainer.h
new file mode 100644
index 000000000..87fb829b2
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_track_association_trainer.h
@@ -0,0 +1,404 @@
+// Copyright (C) 2014  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_STRUCTURAL_TRACK_ASSOCIATION_TRAnER_Hh_
+#define DLIB_STRUCTURAL_TRACK_ASSOCIATION_TRAnER_Hh_
+
+#include "structural_track_association_trainer_abstract.h"
+#include "../algs.h"
+#include "svm.h"
+#include <utility>
+#include "track_association_function.h"
+#include "structural_assignment_trainer.h"
+#include <map>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <
+            typename detection_type,
+            typename label_type
+            >
+        std::vector<detection_type> get_unlabeled_dets (
+            const std::vector<labeled_detection<detection_type,label_type> >& dets
+        )
+        {
+            std::vector<detection_type> temp;
+            temp.reserve(dets.size());
+            for (unsigned long i = 0; i < dets.size(); ++i)
+                temp.push_back(dets[i].det);
+            return temp;
+        }
+
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    class structural_track_association_trainer
+    {
+    public:
+
+        structural_track_association_trainer (
+        )  
+        {
+            set_defaults();
+        }
+
+        void set_num_threads (
+            unsigned long num
+        )
+        {
+            num_threads = num;
+        }
+
+        unsigned long get_num_threads (
+        ) const
+        {
+            return num_threads;
+        }
+
+        void set_epsilon (
+            double eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void structural_track_association_trainer::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_;
+        }
+
+        double get_epsilon (
+        ) const { return eps; }
+
+        void set_max_cache_size (
+            unsigned long max_size
+        )
+        {
+            max_cache_size = max_size;
+        }
+
+        unsigned long get_max_cache_size (
+        ) const
+        {
+            return max_cache_size; 
+        }
+
+        void set_loss_per_false_association (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss > 0, 
+                "\t void structural_track_association_trainer::set_loss_per_false_association(loss)"
+                << "\n\t Invalid inputs were given to this function "
+                << "\n\t loss: " << loss
+                << "\n\t this: " << this
+                );
+
+            loss_per_false_association = loss;
+        }
+
+        double get_loss_per_false_association (
+        ) const
+        {
+            return loss_per_false_association;
+        }
+
+        void set_loss_per_track_break (
+            double loss
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(loss > 0, 
+                "\t void structural_track_association_trainer::set_loss_per_track_break(loss)"
+                << "\n\t Invalid inputs were given to this function "
+                << "\n\t loss: " << loss
+                << "\n\t this: " << this
+                );
+
+            loss_per_track_break = loss;
+        }
+
+        double get_loss_per_track_break (
+        ) const
+        {
+            return loss_per_track_break;
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void set_oca (
+            const oca& item
+        )
+        {
+            solver = item;
+        }
+
+        const oca get_oca (
+        ) const
+        {
+            return solver;
+        }
+
+        void set_c (
+            double C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void structural_track_association_trainer::set_c()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+        }
+
+        double get_c (
+        ) const
+        {
+            return C;
+        }
+
+        bool learns_nonnegative_weights (
+        ) const { return learn_nonnegative_weights; }
+       
+        void set_learns_nonnegative_weights (
+            bool value
+        )
+        {
+            learn_nonnegative_weights = value;
+        }
+
+        template <
+            typename detection_type,
+            typename label_type
+            >
+        const track_association_function<detection_type> train (  
+            const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_track_association_problem(samples),
+                        "\t track_association_function structural_track_association_trainer::train()"
+                        << "\n\t invalid inputs were given to this function"
+                        << "\n\t is_track_association_problem(samples): " << is_track_association_problem(samples)
+            );
+
+            typedef typename detection_type::track_type track_type;
+
+            const unsigned long num_dims = find_num_dims(samples);
+
+            feature_extractor_track_association<detection_type> fe(num_dims, learn_nonnegative_weights?num_dims:0);
+            structural_assignment_trainer<feature_extractor_track_association<detection_type> > trainer(fe);
+
+
+            if (verbose)
+                trainer.be_verbose();
+
+            trainer.set_c(C);
+            trainer.set_epsilon(eps);
+            trainer.set_max_cache_size(max_cache_size);
+            trainer.set_num_threads(num_threads);
+            trainer.set_oca(solver);
+            trainer.set_loss_per_missed_association(loss_per_track_break);
+            trainer.set_loss_per_false_association(loss_per_false_association);
+
+            std::vector<std::pair<std::vector<detection_type>, std::vector<track_type> > > assignment_samples;
+            std::vector<std::vector<long> > labels;
+            for (unsigned long i = 0; i < samples.size(); ++i)
+                convert_dets_to_association_sets(samples[i], assignment_samples, labels);
+
+
+            return track_association_function<detection_type>(trainer.train(assignment_samples, labels));
+        }
+
+        template <
+            typename detection_type,
+            typename label_type
+            >
+        const track_association_function<detection_type> train (  
+            const std::vector<std::vector<labeled_detection<detection_type,label_type> > >& sample
+        ) const
+        {
+            std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > > samples;
+            samples.push_back(sample);
+            return train(samples);
+        }
+
+    private:
+
+        template <
+            typename detection_type,
+            typename label_type
+            >
+        static unsigned long find_num_dims (
+            const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples
+        )
+        {
+            typedef typename detection_type::track_type track_type;
+            // find a detection_type object so we can call get_similarity_features() and
+            // find out how big the feature vectors are.
+
+            // for all detection histories 
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                // for all time instances in the detection history
+                for (unsigned j = 0; j < samples[i].size(); ++j)
+                {
+                    if (samples[i][j].size() > 0)
+                    {
+                        track_type new_track;
+                        new_track.update_track(samples[i][j][0].det);
+                        typename track_type::feature_vector_type feats;
+                        new_track.get_similarity_features(samples[i][j][0].det, feats);
+                        return feats.size();
+                    }
+                }
+            }
+
+            DLIB_CASSERT(false, 
+                "No detection objects were given in the call to dlib::structural_track_association_trainer::train()");
+        }
+
+        template <
+            typename detections_at_single_time_step,
+            typename detection_type,
+            typename track_type
+            >
+        static void convert_dets_to_association_sets (
+            const std::vector<detections_at_single_time_step>& det_history,
+            std::vector<std::pair<std::vector<detection_type>, std::vector<track_type> > >& data,
+            std::vector<std::vector<long> >& labels
+        ) 
+        {
+            if (det_history.size() < 1)
+                return;
+
+            typedef typename detections_at_single_time_step::value_type::label_type label_type;
+            std::vector<track_type> tracks;
+            // track_labels maps from detection labels to the index in tracks.  So track
+            // with detection label X is at tracks[track_labels[X]].
+            std::map<label_type,unsigned long> track_labels;
+            add_dets_to_tracks(tracks, track_labels, det_history[0]);
+
+            using namespace impl;
+            for (unsigned long i = 1; i < det_history.size(); ++i)
+            {
+                data.push_back(std::make_pair(get_unlabeled_dets(det_history[i]), tracks));
+                labels.push_back(get_association_labels(det_history[i], track_labels));
+                add_dets_to_tracks(tracks, track_labels, det_history[i]);
+            }
+        }
+
+        template <
+            typename labeled_detection,
+            typename label_type
+            >
+        static std::vector<long> get_association_labels(
+            const std::vector<labeled_detection>& dets,
+            const std::map<label_type,unsigned long>& track_labels
+        )
+        {
+            std::vector<long> assoc(dets.size(),-1);
+            // find out which detections associate to what tracks
+            for (unsigned long i = 0; i < dets.size(); ++i)
+            {
+                typename std::map<label_type,unsigned long>::const_iterator j;
+                j = track_labels.find(dets[i].label);
+                // If this detection matches one of the tracks then record which track it
+                // matched with.
+                if (j != track_labels.end())
+                    assoc[i] = j->second;
+            }
+            return assoc;
+        }
+
+        template <
+            typename track_type,
+            typename label_type,
+            typename labeled_detection
+            >
+        static void add_dets_to_tracks (
+            std::vector<track_type>& tracks,
+            std::map<label_type,unsigned long>& track_labels,
+            const std::vector<labeled_detection>& dets
+        )
+        {
+            std::vector<bool> updated_track(tracks.size(), false);
+
+            // first assign the dets to the tracks
+            for (unsigned long i = 0; i < dets.size(); ++i)
+            {
+                const label_type& label = dets[i].label;
+                if (track_labels.count(label))
+                {
+                    const unsigned long track_idx = track_labels[label];
+                    tracks[track_idx].update_track(dets[i].det);
+                    updated_track[track_idx] = true;
+                }
+                else
+                {
+                    // this detection creates a new track
+                    track_type new_track;
+                    new_track.update_track(dets[i].det);
+                    tracks.push_back(new_track);
+                    track_labels[label] = tracks.size()-1;
+                }
+
+            }
+
+            // Now propagate all the tracks that didn't get any detections.
+            for (unsigned long i = 0; i < updated_track.size(); ++i)
+            {
+                if (!updated_track[i])
+                    tracks[i].propagate_track();
+            }
+        }
+
+        double C;
+        oca solver;
+        double eps;
+        bool verbose;
+        unsigned long num_threads;
+        unsigned long max_cache_size;
+        bool learn_nonnegative_weights;
+        double loss_per_track_break;
+        double loss_per_false_association;
+
+        void set_defaults ()
+        {
+            C = 100;
+            verbose = false;
+            eps = 0.001;
+            num_threads = 2;
+            max_cache_size = 5;
+            learn_nonnegative_weights = false;
+            loss_per_track_break = 1;
+            loss_per_false_association = 1;
+        }
+    };
+
+}
+
+#endif // DLIB_STRUCTURAL_TRACK_ASSOCIATION_TRAnER_Hh_
+
diff --git a/ml/dlib/dlib/svm/structural_track_association_trainer_abstract.h b/ml/dlib/dlib/svm/structural_track_association_trainer_abstract.h
new file mode 100644
index 000000000..e78fadef7
--- /dev/null
+++ b/ml/dlib/dlib/svm/structural_track_association_trainer_abstract.h
@@ -0,0 +1,268 @@
+// Copyright (C) 2014  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_STRUCTURAL_TRACK_ASSOCIATION_TRAnER_ABSTRACT_Hh_
+#ifdef DLIB_STRUCTURAL_TRACK_ASSOCIATION_TRAnER_ABSTRACT_Hh_
+
+#include "track_association_function_abstract.h"
+#include "structural_assignment_trainer_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class structural_track_association_trainer
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool for learning to solve a track association problem.  That 
+                is, it takes in a set of training data and outputs a track_association_function 
+                you can use to do detection to track association.  The training data takes the
+                form of a set or sets of "track histories".  Each track history is a
+                std::vector where each element contains all the detections from a single time
+                step.  Moreover, each detection has a label that uniquely identifies which
+                object (e.g. person or whatever) the detection really corresponds to.  That is,
+                the labels indicate the correct detection to track associations.  The goal of
+                this object is then to produce a track_association_function that can perform a
+                correct detection to track association at each time step.
+        !*/
+
+    public:
+
+        structural_track_association_trainer (
+        );  
+        /*!
+            ensures
+                - #get_c() == 100
+                - this object isn't verbose
+                - #get_epsilon() == 0.001
+                - #get_num_threads() == 2
+                - #get_max_cache_size() == 5
+                - #learns_nonnegative_weights() == false
+                - #get_loss_per_track_break() == 1
+                - #get_loss_per_false_association() == 1
+        !*/
+
+        void set_num_threads (
+            unsigned long num
+        );
+        /*!
+            ensures
+                - #get_num_threads() == num
+        !*/
+
+        unsigned long get_num_threads (
+        ) const;
+        /*!
+            ensures
+                - returns the number of threads used during training.  You should 
+                  usually set this equal to the number of processing cores on your
+                  machine.
+        !*/
+
+        void set_epsilon (
+            double eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps
+        !*/
+
+        double get_epsilon (
+        ) const; 
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer to
+                  train.  You can think of this epsilon value as saying "solve the
+                  optimization problem until the average number of association mistakes per
+                  time step is within epsilon of its optimal value".
+        !*/
+
+        void set_max_cache_size (
+            unsigned long max_size
+        );
+        /*!
+            ensures
+                - #get_max_cache_size() == max_size
+        !*/
+
+        unsigned long get_max_cache_size (
+        ) const;
+        /*!
+            ensures
+                - During training, this object basically runs the track_association_function on 
+                  each training sample, over and over.  To speed this up, it is possible to 
+                  cache the results of these invocations.  This function returns the number 
+                  of cache elements per training sample kept in the cache.  Note that a value 
+                  of 0 means caching is not used at all.  
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a user can
+                  observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_loss_per_false_association (
+            double loss
+        );
+        /*!
+            requires
+                - loss > 0
+            ensures
+                - #get_loss_per_false_association() == loss
+        !*/
+
+        double get_loss_per_false_association (
+        ) const;
+        /*!
+            ensures
+                - returns the amount of loss experienced for assigning a detection to the
+                  wrong track.  If you care more about avoiding false associations than
+                  avoiding track breaks then you can increase this value.
+        !*/
+
+        void set_loss_per_track_break (
+            double loss
+        );
+        /*!
+            requires
+                - loss > 0
+            ensures
+                - #get_loss_per_track_break() == loss
+        !*/
+
+        double get_loss_per_track_break (
+        ) const;
+        /*!
+            ensures
+                - returns the amount of loss experienced for incorrectly assigning a
+                  detection to a new track instead of assigning it to its existing track.
+                  If you care more about avoiding track breaks than avoiding things like
+                  track swaps then you can increase this value.
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - Internally this object treats track association learning as a structural
+                  SVM problem.  This routine returns a copy of the optimizer used to solve
+                  the structural SVM problem.  
+        !*/
+
+        void set_c (
+            double C
+        );
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter that
+                  determines the trade-off between trying to fit the training data (i.e.
+                  minimize the loss) or allowing more errors but hopefully improving the
+                  generalization of the resulting track_association_function.  Larger
+                  values encourage exact fitting while smaller values of C may encourage
+                  better generalization. 
+        !*/
+
+        double get_c (
+        ) const;
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() = C
+        !*/
+
+        bool learns_nonnegative_weights (
+        ) const; 
+        /*!
+            ensures
+                - Ultimately, the output of training is a parameter vector that defines the
+                  behavior of the track_association_function.  If
+                  learns_nonnegative_weights() == true then the resulting learned parameter
+                  vector will always have non-negative entries.
+        !*/
+       
+        void set_learns_nonnegative_weights (
+            bool value
+        );
+        /*!
+            ensures
+                - #learns_nonnegative_weights() == value
+        !*/
+
+        template <
+            typename detection_type,
+            typename label_type
+            >
+        const track_association_function<detection_type> train (  
+            const std::vector<std::vector<labeled_detection<detection_type,label_type> > >& sample
+        ) const;
+        /*!
+            requires
+                - is_track_association_problem(sample) == true
+            ensures
+                - This function attempts to learn to do track association from the given
+                  training data.  Note that we interpret sample as a single track history such
+                  that sample[0] are all detections from the first time step, then sample[1]
+                  are detections from the second time step, and so on.  
+                - returns a function F such that:
+                    - Executing F(tracks, detections) will try to correctly associate the
+                      contents of detections to the contents of tracks and perform track
+                      updating and creation.
+                    - if (learns_nonnegative_weights() == true) then
+                        - min(F.get_assignment_function().get_weights()) >= 0
+        !*/
+
+        template <
+            typename detection_type,
+            typename label_type
+            >
+        const track_association_function<detection_type> train (  
+            const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& sample
+        ) const;
+        /*!
+            requires
+                - is_track_association_problem(samples) == true
+            ensures
+                - This function attempts to learn to do track association from the given
+                  training data.  In this case, we take a set of track histories as
+                  training data instead of just one track history as with the above train()
+                  method.
+                - returns a function F such that:
+                    - Executing F(tracks, detections) will try to correctly associate the
+                      contents of detections to the contents of tracks and perform track
+                      updating and creation.
+                    - if (learns_nonnegative_weights() == true) then
+                        - min(F.get_assignment_function().get_weights()) >= 0
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_STRUCTURAL_TRACK_ASSOCIATION_TRAnER_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/svm.h b/ml/dlib/dlib/svm/svm.h
new file mode 100644
index 000000000..e0587ef4a
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm.h
@@ -0,0 +1,1205 @@
+// Copyright (C) 2007  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_
+#define DLIB_SVm_
+
+#include "svm_abstract.h"
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include "../rand.h"
+#include "../std_allocator.h"
+#include "function.h"
+#include "kernel.h"
+#include "../enable_if.h"
+#include "../optimization.h"
+#include "svm_nu_trainer.h"
+#include <vector>
+#include <set>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    inline bool is_learning_problem_impl (
+        const T& x,
+        const U& x_labels
+    )
+    {
+        return is_col_vector(x) && 
+               is_col_vector(x_labels) && 
+               x.size() == x_labels.size() && 
+               x.size() > 0;
+    }
+
+    template <
+        typename T,
+        typename U
+        >
+    inline bool is_learning_problem (
+        const T& x,
+        const U& x_labels
+    )
+    {
+        return is_learning_problem_impl(mat(x), mat(x_labels));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    bool is_binary_classification_problem_impl (
+        const T& x,
+        const U& x_labels
+    )
+    {
+        bool seen_neg_class = false;
+        bool seen_pos_class = false;
+
+        if (is_learning_problem_impl(x,x_labels) == false)
+            return false;
+
+        if (x.size() <= 1) return false;
+
+        for (long r = 0; r < x_labels.nr(); ++r)
+        {
+            if (x_labels(r) != -1 && x_labels(r) != 1)
+                return false;
+
+            if (x_labels(r) == 1)
+                seen_pos_class = true;
+            if (x_labels(r) == -1)
+                seen_neg_class = true;
+        }
+
+        return seen_pos_class && seen_neg_class;
+    }
+
+    template <
+        typename T,
+        typename U
+        >
+    bool is_binary_classification_problem (
+        const T& x,
+        const U& x_labels
+    )
+    {
+        return is_binary_classification_problem_impl(mat(x), mat(x_labels));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename dec_funct_type,
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type
+        >
+    const matrix<double,1,2> test_binary_decision_function_impl (
+        const dec_funct_type& dec_funct,
+        const in_sample_vector_type& x_test,
+        const in_scalar_vector_type& y_test
+    )
+    {
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT( is_binary_classification_problem(x_test,y_test) == true,
+                    "\tmatrix test_binary_decision_function()"
+                    << "\n\t invalid inputs were given to this function"
+                    << "\n\t is_binary_classification_problem(x_test,y_test): " 
+                    << ((is_binary_classification_problem(x_test,y_test))? "true":"false"));
+
+
+        // count the number of positive and negative examples
+        long num_pos = 0;
+        long num_neg = 0;
+
+
+        long num_pos_correct = 0;
+        long num_neg_correct = 0;
+
+
+        // now test this trained object 
+        for (long i = 0; i < x_test.nr(); ++i)
+        {
+            // if this is a positive example
+            if (y_test(i) == +1.0)
+            {
+                ++num_pos;
+                if (dec_funct(x_test(i)) >= 0)
+                    ++num_pos_correct;
+            }
+            else if (y_test(i) == -1.0)
+            {
+                ++num_neg;
+                if (dec_funct(x_test(i)) < 0)
+                    ++num_neg_correct;
+            }
+            else
+            {
+                throw dlib::error("invalid input labels to the test_binary_decision_function() function");
+            }
+        }
+
+
+        matrix<double, 1, 2> res;
+        res(0) = (double)num_pos_correct/(double)(num_pos); 
+        res(1) = (double)num_neg_correct/(double)(num_neg); 
+        return res;
+    }
+
+    template <
+        typename dec_funct_type,
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type
+        >
+    const matrix<double,1,2> test_binary_decision_function (
+        const dec_funct_type& dec_funct,
+        const in_sample_vector_type& x_test,
+        const in_scalar_vector_type& y_test
+    )
+    {
+        return test_binary_decision_function_impl(dec_funct,
+                                 mat(x_test),
+                                 mat(y_test));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sequence_type 
+        >
+    bool is_sequence_labeling_problem (
+        const std::vector<sequence_type>& samples,
+        const std::vector<std::vector<unsigned long> >& labels
+    )
+    {
+        if (is_learning_problem(samples, labels))
+        {
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                if (samples[i].size() != labels[i].size())
+                    return false;
+            }
+            return true;
+        }
+
+        return false;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sequence_type 
+        >
+    bool is_sequence_segmentation_problem (
+        const std::vector<sequence_type>& samples,
+        const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments
+    )
+    {
+        if (is_learning_problem(samples, segments))
+        {
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                // Make sure the segments are inside samples[i] and don't overlap with each
+                // other.
+                std::vector<bool> hits(samples[i].size(), false);
+                for (unsigned long j = 0; j < segments[i].size(); ++j)
+                {
+                    const unsigned long begin = segments[i][j].first;
+                    const unsigned long end = segments[i][j].second;
+                    // if the segment is outside the sequence
+                    if (end > samples[i].size())
+                        return false;
+
+                    if (begin >= end)
+                        return false;
+
+                    // check for overlap
+                    for (unsigned long k = begin; k < end; ++k)
+                    {
+                        if (hits[k])
+                            return false;
+                        hits[k] = true;
+                    }
+                }
+            }
+            return true;
+        }
+
+        return false;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename lhs_type, 
+        typename rhs_type
+        >
+    bool is_assignment_problem (
+        const std::vector<std::pair<std::vector<lhs_type>, std::vector<rhs_type> > >& samples,
+        const std::vector<std::vector<long> >& labels
+    )
+    {
+        std::vector<bool> seen_label;
+
+        if (is_learning_problem(samples, labels))
+        {
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                if (samples[i].first.size() != labels[i].size())
+                    return false;
+
+                seen_label.assign(samples[i].second.size(), false);
+
+                for (unsigned long j = 0; j < labels[i].size(); ++j)
+                {
+                    if (!(-1 <= labels[i][j] && labels[i][j] < (long)samples[i].second.size()))
+                        return false;
+
+                    if (labels[i][j] != -1)
+                    {
+                        // check label uniqueness
+                        if (seen_label[labels[i][j]])
+                            return false;
+
+                        seen_label[labels[i][j]] = true;
+                    }
+                }
+            }
+            return true;
+        }
+
+        return false;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename lhs_type, 
+        typename rhs_type
+        >
+    bool is_forced_assignment_problem (
+        const std::vector<std::pair<std::vector<lhs_type>, std::vector<rhs_type> > >& samples,
+        const std::vector<std::vector<long> >& labels
+    )
+    {
+        if (is_assignment_problem(samples, labels))
+        {
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                const unsigned long N = sum(mat(labels[i]) != -1);
+                if (std::min(samples[i].first.size(), samples[i].second.size()) != N)
+                    return false;
+            }
+            return true;
+        }
+
+        return false;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename detection_type_,
+        typename label_type_ = long
+        >
+    struct labeled_detection
+    {
+        typedef detection_type_ detection_type;
+        typedef label_type_ label_type;
+        detection_type det;
+        label_type label;
+    };
+
+    template <
+        typename detection_type_,
+        typename label_type_ 
+        >
+    inline void serialize ( const labeled_detection<detection_type_,label_type_>& item, std::ostream& out)
+    {
+        serialize(item.det, out);
+        serialize(item.label, out);
+    }
+
+    template <
+        typename detection_type_,
+        typename label_type_ 
+        >
+    inline void deserialize (labeled_detection<detection_type_,label_type_>& item, std::istream& in)
+    {
+        deserialize(item.det, in);
+        deserialize(item.label, in);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename detection_type, 
+        typename label_type 
+        >
+    bool is_track_association_problem (
+        const std::vector<std::vector<labeled_detection<detection_type,label_type> > >& samples
+    )
+    {
+        if (samples.size() == 0)
+            return false;
+
+        unsigned long num_nonzero_elements = 0;
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            if (samples.size() > 0)
+                ++num_nonzero_elements;
+        }
+        if (num_nonzero_elements < 2)
+            return false;
+
+        // now make sure the label_type values are unique within each time step.
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            std::set<label_type> vals;
+            for (unsigned long j = 0; j < samples[i].size(); ++j)
+                vals.insert(samples[i][j].label);
+            if (vals.size() != samples[i].size())
+                return false;
+        }
+
+        // passed all tests so it's good
+        return true;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename detection_type, 
+        typename label_type 
+        >
+    bool is_track_association_problem (
+        const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples
+    )
+    {
+        for (unsigned long i = 0; i < samples.size(); ++i)
+        {
+            if (!is_track_association_problem(samples[i]))
+                return false;
+        }
+
+        // passed all tests so it's good
+        return true;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type
+        >
+    const matrix<double, 1, 2, typename trainer_type::mem_manager_type> 
+    cross_validate_trainer_impl (
+        const trainer_type& trainer,
+        const in_sample_vector_type& x,
+        const in_scalar_vector_type& y,
+        const long folds
+    )
+    {
+        typedef typename in_scalar_vector_type::value_type scalar_type;
+        typedef typename trainer_type::mem_manager_type mem_manager_type;
+        typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_binary_classification_problem(x,y) == true &&
+                    1 < folds && folds <= std::min(sum(y>0),sum(y<0)),
+            "\tmatrix cross_validate_trainer()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t std::min(sum(y>0),sum(y<0)): " << std::min(sum(y>0),sum(y<0))
+            << "\n\t folds:  " << folds 
+            << "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
+            );
+
+
+        // count the number of positive and negative examples
+        long num_pos = 0;
+        long num_neg = 0;
+        for (long r = 0; r < y.nr(); ++r)
+        {
+            if (y(r) == +1.0)
+                ++num_pos;
+            else
+                ++num_neg;
+        }
+
+        // figure out how many positive and negative examples we will have in each fold
+        const long num_pos_test_samples = num_pos/folds; 
+        const long num_pos_train_samples = num_pos - num_pos_test_samples; 
+        const long num_neg_test_samples = num_neg/folds; 
+        const long num_neg_train_samples = num_neg - num_neg_test_samples; 
+
+
+        matrix<long,0,1> x_test, x_train;
+        scalar_vector_type y_test, y_train;
+        x_test.set_size (num_pos_test_samples  + num_neg_test_samples);
+        y_test.set_size (num_pos_test_samples  + num_neg_test_samples);
+        x_train.set_size(num_pos_train_samples + num_neg_train_samples);
+        y_train.set_size(num_pos_train_samples + num_neg_train_samples);
+
+        long pos_idx = 0;
+        long neg_idx = 0;
+
+        matrix<double, 1, 2, mem_manager_type> res;
+        set_all_elements(res,0);
+
+        for (long i = 0; i < folds; ++i)
+        {
+            long cur = 0;
+
+            // load up our positive test samples
+            while (cur < num_pos_test_samples)
+            {
+                if (y(pos_idx) == +1.0)
+                {
+                    x_test(cur) = pos_idx;
+                    y_test(cur) = +1.0;
+                    ++cur;
+                }
+                pos_idx = (pos_idx+1)%x.nr();
+            }
+
+            // load up our negative test samples
+            while (cur < x_test.nr())
+            {
+                if (y(neg_idx) == -1.0)
+                {
+                    x_test(cur) = neg_idx;
+                    y_test(cur) = -1.0;
+                    ++cur;
+                }
+                neg_idx = (neg_idx+1)%x.nr();
+            }
+
+            // load the training data from the data following whatever we loaded
+            // as the testing data
+            long train_pos_idx = pos_idx;
+            long train_neg_idx = neg_idx;
+            cur = 0;
+
+            // load up our positive train samples
+            while (cur < num_pos_train_samples)
+            {
+                if (y(train_pos_idx) == +1.0)
+                {
+                    x_train(cur) = train_pos_idx;
+                    y_train(cur) = +1.0;
+                    ++cur;
+                }
+                train_pos_idx = (train_pos_idx+1)%x.nr();
+            }
+
+            // load up our negative train samples
+            while (cur < x_train.nr())
+            {
+                if (y(train_neg_idx) == -1.0)
+                {
+                    x_train(cur) = train_neg_idx;
+                    y_train(cur) = -1.0;
+                    ++cur;
+                }
+                train_neg_idx = (train_neg_idx+1)%x.nr();
+            }
+
+            try
+            {
+                // do the training and testing
+                res += test_binary_decision_function(trainer.train(rowm(x,x_train),y_train),rowm(x,x_test),y_test);
+            }
+            catch (invalid_nu_error&)
+            {
+                // Just ignore the error in this case since we are going to
+                // interpret an invalid nu value the same as generating a decision
+                // function that miss-classifies everything.
+            }
+
+        } // for (long i = 0; i < folds; ++i)
+
+        return res/(double)folds;
+    }
+
+    template <
+        typename trainer_type,
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type
+        >
+    const matrix<double, 1, 2, typename trainer_type::mem_manager_type> 
+    cross_validate_trainer (
+        const trainer_type& trainer,
+        const in_sample_vector_type& x,
+        const in_scalar_vector_type& y,
+        const long folds
+    )
+    {
+        return cross_validate_trainer_impl(trainer,
+                                           mat(x),
+                                           mat(y),
+                                           folds);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    namespace prob_impl
+    {
+        template <typename vect_type>
+        struct objective
+        {
+            objective (
+                const vect_type& f_,
+                const vect_type& t_
+            ) : f(f_), t(t_) {}
+
+            double operator() (
+                const matrix<double,2,1>& x
+            ) const
+            {
+                const double A = x(0);
+                const double B = x(1);
+
+                double res = 0;
+                for (unsigned long i = 0; i < f.size(); ++i)
+                {
+                    const double val = A*f[i]+B;
+                    // See the paper "A Note on Platt's Probabilistic Outputs for Support Vector Machines"
+                    // for an explanation of why this code looks the way it does (rather than being the 
+                    // obvious formula).
+                    if (val < 0)
+                        res += (t[i] - 1)*val + std::log(1 + std::exp(val));
+                    else
+                        res += t[i]*val + std::log(1 + std::exp(-val));
+                }
+
+                return res;
+            }
+
+            const vect_type& f;
+            const vect_type& t;
+        };
+
+        template <typename vect_type>
+        struct der
+        {
+            der (
+                const vect_type& f_,
+                const vect_type& t_
+            ) : f(f_), t(t_) {}
+
+            matrix<double,2,1> operator() (
+                const matrix<double,2,1>& x
+            ) const
+            {
+                const double A = x(0);
+                const double B = x(1);
+
+                double derA = 0;
+                double derB = 0;
+
+                for (unsigned long i = 0; i < f.size(); ++i)
+                {
+                    const double val = A*f[i]+B;
+                    double p;
+                    // compute p = 1/(1+exp(val)) 
+                    // but do so in a way that avoids numerical overflow.
+                    if (val < 0)
+                        p = 1.0/(1 + std::exp(val));
+                    else
+                        p = std::exp(-val)/(1 + std::exp(-val));
+
+                    derA += f[i]*(t[i] - p);
+                    derB +=      (t[i] - p);
+                }
+
+                matrix<double,2,1> res;
+                res = derA, derB;
+                return res;
+            }
+
+            const vect_type& f;
+            const vect_type& t;
+        };
+
+        template <typename vect_type>
+        struct hessian 
+        {
+            hessian (
+                const vect_type& f_,
+                const vect_type& t_
+            ) : f(f_), t(t_) {}
+
+            matrix<double,2,2> operator() (
+                const matrix<double,2,1>& x
+            ) const
+            {
+                const double A = x(0);
+                const double B = x(1);
+
+                matrix<double,2,2> h;
+                h = 0;
+
+                for (unsigned long i = 0; i < f.size(); ++i)
+                {
+                    const double val = A*f[i]+B;
+                    // compute pp = 1/(1+exp(val)) and
+                    // compute pn = 1 - pp
+                    // but do so in a way that avoids numerical overflow and catastrophic cancellation.
+                    double pp, pn;
+                    if (val < 0)
+                    {
+                        const double temp = std::exp(val);
+                        pp = 1.0/(1 + temp);
+                        pn = temp*pp; 
+                    }
+                    else
+                    {
+                        const double temp = std::exp(-val);
+                        pn = 1.0/(1 + temp);
+                        pp = temp*pn; 
+                    }
+
+                    h(0,0) += f[i]*f[i]*pp*pn;
+                    const double temp2 = f[i]*pp*pn;
+                    h(0,1) += temp2;
+                    h(1,0) += temp2;
+                    h(1,1) += pp*pn;
+                }
+
+                return h;
+            }
+
+            const vect_type& f;
+            const vect_type& t;
+        };
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    inline double platt_scale (
+        const std::pair<double,double>& params,
+        const double score
+    )
+    {
+        return 1/(1 + std::exp(params.first*score + params.second));
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename alloc>
+    std::pair<double,double> learn_platt_scaling (
+        const std::vector<T,alloc>& scores,
+        const std::vector<T,alloc>& labels
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_binary_classification_problem(scores,labels) == true,
+            "\t std::pair<T,T> learn_platt_scaling()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t scores.size(): " << scores.size() 
+            << "\n\t labels.size(): " << labels.size() 
+            << "\n\t is_binary_classification_problem(scores,labels): " << is_binary_classification_problem(scores,labels)
+            );
+
+        const T num_pos = sum(mat(labels)>0); 
+        const T num_neg = sum(mat(labels)<0);
+        const T hi_target = (num_pos+1)/(num_pos+2);
+        const T lo_target = 1.0/(num_neg+2);
+
+        std::vector<T,alloc> target;
+        for (unsigned long i = 0; i < labels.size(); ++i)
+        {
+            // if this was a positive example
+            if (labels[i] == +1.0)
+            {
+                target.push_back(hi_target);
+            }
+            else if (labels[i] == -1.0)
+            {
+                target.push_back(lo_target);
+            }
+            else
+            {
+                throw dlib::error("invalid input labels to the learn_platt_scaling() function.");
+            }
+        }
+
+        // Now find the maximum likelihood parameters of the sigmoid.  
+
+        prob_impl::objective<std::vector<T,alloc> > obj(scores, target);
+        prob_impl::der<std::vector<T,alloc> > obj_der(scores, target);
+        prob_impl::hessian<std::vector<T,alloc> > obj_hessian(scores, target);
+
+        matrix<double,2,1> val;
+        val = 0;
+        find_min(newton_search_strategy(obj_hessian),
+                 objective_delta_stop_strategy(),
+                 obj,
+                 obj_der,
+                 val,
+                 0);
+
+        const double A = val(0);
+        const double B = val(1);
+
+        return std::make_pair(A,B);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename sample_vector_type,
+        typename label_vector_type
+        >
+    const probabilistic_function<typename trainer_type::trained_function_type> 
+    train_probabilistic_decision_function (
+        const trainer_type& trainer,
+        const sample_vector_type& x,
+        const label_vector_type& y,
+        const long folds
+    )
+    {
+        typedef typename sample_vector_type::value_type sample_type;
+        typedef typename label_vector_type::value_type scalar_type;
+
+        /*
+            This function fits a sigmoid function to the output of the 
+            svm trained by svm_nu_trainer or a similar trainer.  The 
+            technique used is the one described in the papers:
+                
+                Probabilistic Outputs for Support Vector Machines and
+                Comparisons to Regularized Likelihood Methods by 
+                John C. Platt.  March 26, 1999
+
+                A Note on Platt's Probabilistic Outputs for Support Vector Machines
+                by Hsuan-Tien Lin, Chih-Jen Lin, and Ruby C. Weng
+        */
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_binary_classification_problem(x,y) == true &&
+                    1 < folds && folds <= (long)x.size(),
+            "\tprobabilistic_decision_function train_probabilistic_decision_function()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t x.size(): " << x.size() 
+            << "\n\t y.size(): " << y.size() 
+            << "\n\t folds:  " << folds 
+            << "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y)
+            );
+
+        // count the number of positive and negative examples
+        const long num_pos = (long)sum(mat(y) > 0);
+        const long num_neg = (long)sum(mat(y) < 0);
+
+        // figure out how many positive and negative examples we will have in each fold
+        const long num_pos_test_samples = num_pos/folds; 
+        const long num_pos_train_samples = num_pos - num_pos_test_samples; 
+        const long num_neg_test_samples = num_neg/folds; 
+        const long num_neg_train_samples = num_neg - num_neg_test_samples; 
+
+        typename trainer_type::trained_function_type d;
+        std::vector<sample_type> x_test, x_train;
+        std::vector<scalar_type> y_test, y_train;
+        x_test.resize (num_pos_test_samples  + num_neg_test_samples);
+        y_test.resize (num_pos_test_samples  + num_neg_test_samples);
+        x_train.resize(num_pos_train_samples + num_neg_train_samples);
+        y_train.resize(num_pos_train_samples + num_neg_train_samples);
+
+        std::vector<scalar_type> out, out_label;
+
+        long pos_idx = 0;
+        long neg_idx = 0;
+
+        for (long i = 0; i < folds; ++i)
+        {
+            long cur = 0;
+
+            // load up our positive test samples
+            while (cur < num_pos_test_samples)
+            {
+                if (y[pos_idx] == +1.0)
+                {
+                    x_test[cur] = x[pos_idx];
+                    y_test[cur] = +1.0;
+                    ++cur;
+                }
+                pos_idx = (pos_idx+1)%x.size();
+            }
+
+            // load up our negative test samples
+            while (cur < (long)x_test.size())
+            {
+                if (y[neg_idx] == -1.0)
+                {
+                    x_test[cur] = x[neg_idx];
+                    y_test[cur] = -1.0;
+                    ++cur;
+                }
+                neg_idx = (neg_idx+1)%x.size();
+            }
+
+            // load the training data from the data following whatever we loaded
+            // as the testing data
+            long train_pos_idx = pos_idx;
+            long train_neg_idx = neg_idx;
+            cur = 0;
+
+            // load up our positive train samples
+            while (cur < num_pos_train_samples)
+            {
+                if (y[train_pos_idx] == +1.0)
+                {
+                    x_train[cur] = x[train_pos_idx];
+                    y_train[cur] = +1.0;
+                    ++cur;
+                }
+                train_pos_idx = (train_pos_idx+1)%x.size();
+            }
+
+            // load up our negative train samples
+            while (cur < (long)x_train.size())
+            {
+                if (y[train_neg_idx] == -1.0)
+                {
+                    x_train[cur] = x[train_neg_idx];
+                    y_train[cur] = -1.0;
+                    ++cur;
+                }
+                train_neg_idx = (train_neg_idx+1)%x.size();
+            }
+
+            // do the training
+            d = trainer.train (x_train,y_train);
+
+            // now test this fold 
+            for (unsigned long i = 0; i < x_test.size(); ++i)
+            {
+                out.push_back(d(x_test[i]));
+                out_label.push_back(y_test[i]);
+            }
+
+        } // for (long i = 0; i < folds; ++i)
+
+        std::pair<double,double> params = learn_platt_scaling(out, out_label);
+
+        const double A = params.first;
+        const double B = params.second;
+
+        return probabilistic_function<typename trainer_type::trained_function_type>( A, B, trainer.train(x,y) );
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename trainer_type>
+    struct trainer_adapter_probabilistic
+    {
+        typedef probabilistic_function<typename trainer_type::trained_function_type> trained_function_type;
+
+        const trainer_type trainer;
+        const long folds;
+
+        trainer_adapter_probabilistic (
+            const trainer_type& trainer_,
+            const long folds_
+        ) : trainer(trainer_),folds(folds_) {}
+
+        template <
+            typename T, 
+            typename U
+            >
+        const trained_function_type train (
+            const T& samples,
+            const U& labels
+        ) const
+        {
+            return train_probabilistic_decision_function(trainer, samples, labels, folds);
+        }
+
+    };
+
+    template <
+        typename trainer_type
+        >
+    trainer_adapter_probabilistic<trainer_type> probabilistic (
+        const trainer_type& trainer,
+        const long folds
+    )
+    {
+        return trainer_adapter_probabilistic<trainer_type>(trainer,folds); 
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U,
+        typename V,
+        typename rand_type 
+        >
+    typename enable_if<is_matrix<T>,void>::type randomize_samples (
+        T& t,
+        U& u,
+        V& v,
+        rand_type& r
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(t) && is_vector(u) && is_vector(v) && u.size() == t.size() &&
+                    u.size() == v.size(),
+            "\t randomize_samples(t,u,v)"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t t.size(): " << t.size()
+            << "\n\t u.size(): " << u.size()
+            << "\n\t v.size(): " << v.size()
+            << "\n\t is_vector(t): " << is_vector(t)
+            << "\n\t is_vector(u): " << is_vector(u)
+            << "\n\t is_vector(v): " << is_vector(v)
+            );
+
+        long n = t.size()-1;
+        while (n > 0)
+        {
+            // pick a random index to swap into t[n]
+            const unsigned long idx = r.get_random_32bit_number()%(n+1);
+
+            // swap our randomly selected index into the n position
+            exchange(t(idx), t(n));
+            exchange(u(idx), u(n));
+            exchange(v(idx), v(n));
+
+            --n;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U,
+        typename V,
+        typename rand_type
+        >
+    typename disable_if<is_matrix<T>,void>::type randomize_samples (
+        T& t,
+        U& u,
+        V& v,
+        rand_type& r
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(u.size() == t.size() && u.size() == v.size(),
+            "\t randomize_samples(t,u,v)"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t t.size(): " << t.size()
+            << "\n\t u.size(): " << u.size()
+            << "\n\t v.size(): " << v.size()
+            );
+
+        long n = t.size()-1;
+        while (n > 0)
+        {
+            // pick a random index to swap into t[n]
+            const unsigned long idx = r.get_random_32bit_number()%(n+1);
+
+            // swap our randomly selected index into the n position
+            exchange(t[idx], t[n]);
+            exchange(u[idx], u[n]);
+            exchange(v[idx], v[n]);
+
+            --n;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U,
+        typename V
+        >
+    typename disable_if<is_rand<V>,void>::type randomize_samples (
+        T& t,
+        U& u,
+        V& v
+    )
+    {
+        rand r;
+        randomize_samples(t,u,v,r);
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U,
+        typename rand_type 
+        >
+    typename enable_if_c<is_matrix<T>::value && is_rand<rand_type>::value,void>::type randomize_samples (
+        T& t,
+        U& u,
+        rand_type& r
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(t) && is_vector(u) && u.size() == t.size(),
+            "\t randomize_samples(t,u)"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t t.size(): " << t.size()
+            << "\n\t u.size(): " << u.size()
+            << "\n\t is_vector(t): " << (is_vector(t)? "true" : "false")
+            << "\n\t is_vector(u): " << (is_vector(u)? "true" : "false")
+            );
+
+        long n = t.size()-1;
+        while (n > 0)
+        {
+            // pick a random index to swap into t[n]
+            const unsigned long idx = r.get_random_32bit_number()%(n+1);
+
+            // swap our randomly selected index into the n position
+            exchange(t(idx), t(n));
+            exchange(u(idx), u(n));
+
+            --n;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U,
+        typename rand_type
+        >
+    typename disable_if_c<is_matrix<T>::value || !is_rand<rand_type>::value,void>::type randomize_samples (
+        T& t,
+        U& u,
+        rand_type& r
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(u.size() == t.size(),
+            "\t randomize_samples(t,u)"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t t.size(): " << t.size()
+            << "\n\t u.size(): " << u.size()
+            );
+
+        long n = t.size()-1;
+        while (n > 0)
+        {
+            // pick a random index to swap into t[n]
+            const unsigned long idx = r.get_random_32bit_number()%(n+1);
+
+            // swap our randomly selected index into the n position
+            exchange(t[idx], t[n]);
+            exchange(u[idx], u[n]);
+
+            --n;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    typename disable_if<is_rand<U>,void>::type randomize_samples (
+        T& t,
+        U& u
+    )
+    {
+        rand r;
+        randomize_samples(t,u,r);
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename rand_type
+        >
+    typename enable_if_c<is_matrix<T>::value && is_rand<rand_type>::value,void>::type randomize_samples (
+        T& t,
+        rand_type& r
+    )
+    {
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_vector(t),
+            "\t randomize_samples(t)"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t is_vector(t): " << (is_vector(t)? "true" : "false")
+            );
+
+        long n = t.size()-1;
+        while (n > 0)
+        {
+            // pick a random index to swap into t[n]
+            const unsigned long idx = r.get_random_32bit_number()%(n+1);
+
+            // swap our randomly selected index into the n position
+            exchange(t(idx), t(n));
+
+            --n;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename rand_type
+        >
+    typename disable_if_c<(is_matrix<T>::value==true)||(is_rand<rand_type>::value==false),void>::type randomize_samples (
+        T& t,
+        rand_type& r
+    )
+    {
+        long n = t.size()-1;
+        while (n > 0)
+        {
+            // pick a random index to swap into t[n]
+            const unsigned long idx = r.get_random_32bit_number()%(n+1);
+
+            // swap our randomly selected index into the n position
+            exchange(t[idx], t[n]);
+
+            --n;
+        }
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    void randomize_samples (
+        T& t
+    )
+    {
+        rand r;
+        randomize_samples(t,r);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_
+
diff --git a/ml/dlib/dlib/svm/svm_abstract.h b/ml/dlib/dlib/svm/svm_abstract.h
new file mode 100644
index 000000000..ec92cf55b
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_abstract.h
@@ -0,0 +1,604 @@
+// Copyright (C) 2007  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_ABSTRACT_
+#ifdef DLIB_SVm_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "svm_nu_trainer_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    bool is_learning_problem (
+        const T& x,
+        const U& x_labels
+    );
+    /*!
+        requires
+            - T == a matrix or something convertible to a matrix via mat()
+            - U == a matrix or something convertible to a matrix via mat()
+        ensures
+            - returns true if all of the following are true and false otherwise:
+                - is_col_vector(x) == true
+                - is_col_vector(x_labels) == true
+                - x.size() == x_labels.size() 
+                - x.size() > 0
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    bool is_binary_classification_problem (
+        const T& x,
+        const U& x_labels
+    );
+    /*!
+        requires
+            - T == a matrix or something convertible to a matrix via mat()
+            - U == a matrix or something convertible to a matrix via mat()
+        ensures
+            - returns true if all of the following are true and false otherwise:
+                - is_learning_problem(x, x_labels) == true
+                - x.size() > 1
+                - there exists at least one sample from both the +1 and -1 classes.
+                  (i.e. all samples can't have the same label)
+                - for all valid i:
+                    - x_labels(i) == -1 or +1
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sequence_type 
+        >
+    bool is_sequence_labeling_problem (
+        const std::vector<sequence_type>& samples,
+        const std::vector<std::vector<unsigned long> >& labels
+    );
+    /*!
+        ensures
+            - returns true if all of the following are true and false otherwise:
+                - is_learning_problem(samples, labels) == true
+                - for all valid i:
+                    - samples[i].size() == labels[i].size()
+                      (i.e. The size of a label sequence need to match the size of 
+                      its corresponding sample sequence)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename sequence_type 
+        >
+    bool is_sequence_segmentation_problem (
+        const std::vector<sequence_type>& samples,
+        const std::vector<std::vector<std::pair<unsigned long,unsigned long> > >& segments
+    );
+    /*!
+        ensures
+            - Note that a sequence segmentation problem is a task where you are given a
+              sequence of objects (e.g. words in a sentence) and your task is to find
+              certain types of sub-sequences (e.g. proper names).
+            - returns true if all of the following are true and false otherwise:
+                - is_learning_problem(samples, segments) == true
+                - for all valid i and j:
+                    - We interpret segments[i][j] as defining a half open range starting
+                      with segments[i][j].first and ending just before segments[i][j].second.
+                    - segments[i][j].first < segments[i][j].second
+                    - segments[i][j].second <= samples[i].size()
+                      (i.e. Each segment must be contained within its associated sequence)
+                    - segments[i][j] does not overlap with any of the other ranges in
+                      segments[i].
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename lhs_type, 
+        typename rhs_type
+        >
+    bool is_assignment_problem (
+        const std::vector<std::pair<std::vector<lhs_type>, std::vector<rhs_type> > >& samples,
+        const std::vector<std::vector<long> >& labels
+    );
+    /*!
+        ensures
+            - Note that an assignment problem is a task to associate each element of samples[i].first
+              to an element of samples[i].second, or to indicate that the element doesn't associate 
+              with anything.  Therefore, labels[i] should contain the association information for
+              samples[i].
+            - This function returns true if all of the following are true and false otherwise:
+                - is_learning_problem(samples, labels) == true
+                - for all valid i:
+                    - samples[i].first.size() == labels[i].size()
+                    - for all valid j:
+                        -1 <= labels[i][j] < samples[i].second.size()
+                        (A value of -1 indicates that samples[i].first[j] isn't associated with anything.
+                        All other values indicate the associating element of samples[i].second)
+                    - All elements of labels[i] which are not equal to -1 are unique.  That is,
+                      multiple elements of samples[i].first can't associate to the same element
+                      in samples[i].second.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename lhs_type, 
+        typename rhs_type
+        >
+    bool is_forced_assignment_problem (
+        const std::vector<std::pair<std::vector<lhs_type>, std::vector<rhs_type> > >& samples,
+        const std::vector<std::vector<long> >& labels
+    );
+    /*!
+        ensures
+            - A regular assignment problem is allowed to indicate that all elements of 
+              samples[i].first don't associate to anything.  However, a forced assignment
+              problem is required to always associate an element of samples[i].first to 
+              something in samples[i].second if there is an element of samples[i].second
+              that hasn't already been associated to something.  
+            - This function returns true if all of the following are true and false otherwise:
+                - is_assignment_problem(samples, labels) == true
+                - for all valid i:
+                    - let N denote the number of elements in labels[i] that are not equal to -1.
+                    - min(samples[i].first.size(), samples[i].second.size()) == N
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename detection_type_,
+        typename label_type_ = long
+        >
+    struct labeled_detection
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is a simple object, like std::pair, it just holds two objects.  It
+                serves the same purpose as std::pair except that it has informative names
+                describing its two members and is intended for use with track association
+                problems.
+        !*/
+
+        typedef detection_type_ detection_type;
+        typedef label_type_ label_type;
+
+        detection_type det;
+        label_type label;
+    };
+
+    template <
+        typename detection_type_,
+        typename label_type_ 
+        >
+    void serialize (const labeled_detection<detection_type_,label_type_>& item, std::ostream& out);
+    /*!
+        provides serialization support
+    !*/
+
+    template <
+        typename detection_type_,
+        typename label_type_ 
+        >
+    void deserialize (labeled_detection<detection_type_,label_type_>& item, std::istream& in);
+    /*!
+        provides deserialization support
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename detection_type, 
+        typename label_type 
+        >
+    bool is_track_association_problem (
+        const std::vector<std::vector<labeled_detection<detection_type,label_type> > >& samples
+    );
+    /*!
+        ensures
+            - In this tracking model you get a set of detections at each time step and are
+              expected to associate each detection with a track or have it spawn a new
+              track.  Therefore, a track association problem is a machine learning problem
+              where you are given a dataset of example input detections and are expected to
+              learn to perform the proper detection to track association.  
+            - This function checks if samples can form a valid dataset for this machine
+              learning problem and returns true if this is the case.  This means we should
+              interpret samples in the following way:
+                - samples is a track history and for each valid i:
+                    - samples[i] is a set of labeled detections from the i-th time step.
+                      Each detection has been labeled with its "true object identity".
+                      That is, all the detection throughout the history with the same
+                      label_type value are detections from the same object and therefore
+                      should be associated to the same track.
+              Putting this all together, samples is a valid track association learning
+              problem if and only if the following are all true:
+                - samples.size() > 0
+                - There are at least two values, i and j such that:
+                    - i != j
+                    - samples[i].size() > 0
+                    - samples[j].size() > 0
+                  Or in other words, there needs to be some detections in samples somewhere
+                  or it is impossible to learn anything.
+                - for all valid i:
+                    - for all valid j and k where j!=k:
+                        - samples[i][j].label != samples[i][k].label
+                          (i.e. the label_type values must be unique within each time step.
+                          Or in other words, you can't have two detections on the same
+                          object in a single time step.)
+    !*/
+
+    template <
+        typename detection_type, 
+        typename label_type 
+        >
+    bool is_track_association_problem (
+        const std::vector<std::vector<std::vector<labeled_detection<detection_type,label_type> > > >& samples
+    );
+    /*!
+        ensures
+            - returns true if is_track_association_problem(samples[i]) == true for all
+              valid i and false otherwise.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    double platt_scale (
+        const std::pair<double,double>& params,
+        const double score
+    );
+    /*!
+        ensures
+            - returns 1/(1 + std::exp(params.first*score + params.second))
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T, typename alloc>
+    std::pair<double,double> learn_platt_scaling (
+        const std::vector<T,alloc>& scores,
+        const std::vector<T,alloc>& labels
+    );
+    /*!
+        requires
+            - T should be either float, double, or long double 
+            - is_binary_classification_problem(scores,labels) == true
+        ensures
+            - This function learns to map scalar values into well calibrated probabilities
+              using Platt scaling.  In particular, it returns a params object such that, 
+              for all valid i:
+                - platt_scale(params,scores[i]) == the scaled version of the scalar value
+                  scores[i].  That is, the output is a number between 0 and 1.  In
+                  particular, platt_scale(params,scores[i]) is meant to represent the
+                  probability that labels[i] == +1.
+            - This function is an implementation of the algorithm described in the following
+              papers: 
+                Probabilistic Outputs for Support Vector Machines and Comparisons to
+                Regularized Likelihood Methods by John C. Platt.  March 26, 1999
+
+                A Note on Platt's Probabilistic Outputs for Support Vector Machines
+                by Hsuan-Tien Lin, Chih-Jen Lin, and Ruby C. Weng
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename sample_vector_type,
+        typename label_vector_type
+        >
+    const probabilistic_function<typename trainer_type::trained_function_type> 
+    train_probabilistic_decision_function (
+        const trainer_type& trainer,
+        const sample_vector_type& x,
+        const label_vector_type& y,
+        const long folds
+    );
+    /*!
+        requires
+            - 1 < folds <= x.size()
+            - is_binary_classification_problem(x,y) == true
+            - x and y must be std::vector objects or types with a compatible interface.
+            - trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer)
+        ensures
+            - trains a classifier given the training samples in x and labels in y.  
+            - returns a probabilistic_decision_function that represents the trained classifier.
+            - The parameters of the probability model are estimated by performing k-fold 
+              cross validation. 
+            - The number of folds used is given by the folds argument.
+            - This function is implemented using learn_platt_scaling()
+        throws
+            - any exceptions thrown by trainer.train()
+            - std::bad_alloc
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type
+        >
+    trainer_adapter_probabilistic<trainer_type> probabilistic (
+        const trainer_type& trainer,
+        const long folds
+    );
+    /*!
+        requires
+            - 1 < folds <= x.size()
+            - trainer_type == some kind of batch trainer object (e.g. svm_nu_trainer)
+        ensures
+            - returns a trainer adapter TA such that calling TA.train(samples, labels)
+              returns the same object as calling train_probabilistic_decision_function(trainer,samples,labels,folds).
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+//                                  Miscellaneous functions
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type
+        >
+    const matrix<double,1,2> cross_validate_trainer (
+        const trainer_type& trainer,
+        const in_sample_vector_type& x,
+        const in_scalar_vector_type& y,
+        const long folds
+    );
+    /*!
+        requires
+            - is_binary_classification_problem(x,y) == true
+            - 1 < folds <= std::min(sum(y>0),sum(y<0))
+              (e.g. There must be at least as many examples of each class as there are folds)
+            - trainer_type == some kind of binary classification trainer object (e.g. svm_nu_trainer)
+        ensures
+            - performs k-fold cross validation by using the given trainer to solve the
+              given binary classification problem for the given number of folds.
+              Each fold is tested using the output of the trainer and the average 
+              classification accuracy from all folds is returned.  
+            - The average accuracy is computed by running test_binary_decision_function()
+              on each fold and its output is averaged and returned.
+            - The number of folds used is given by the folds argument.
+        throws
+            - any exceptions thrown by trainer.train()
+            - std::bad_alloc
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename dec_funct_type,
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type
+        >
+    const matrix<double,1,2> test_binary_decision_function (
+        const dec_funct_type& dec_funct,
+        const in_sample_vector_type& x_test,
+        const in_scalar_vector_type& y_test
+    );
+    /*!
+        requires
+            - is_binary_classification_problem(x_test,y_test) == true
+            - dec_funct_type == some kind of decision function object (e.g. decision_function)
+        ensures
+            - Tests the given decision function by calling it on the x_test and y_test samples.
+              The output of dec_funct is interpreted as a prediction for the +1 class
+              if its output is >= 0 and as a prediction for the -1 class otherwise.
+            - The test accuracy is returned in a row vector, let us call it R.  Both 
+              quantities in R are numbers between 0 and 1 which represent the fraction 
+              of examples correctly classified.  R(0) is the fraction of +1 examples 
+              correctly classified and R(1) is the fraction of -1 examples correctly 
+              classified.
+        throws
+            - std::bad_alloc
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U
+        >
+    void randomize_samples (
+        T& samples,
+        U& labels 
+    );
+    /*!
+        requires
+            - T == a matrix object or an object compatible with std::vector that contains 
+              a swappable type.
+            - U == a matrix object or an object compatible with std::vector that contains 
+              a swappable type.
+            - if samples or labels are matrix objects then is_vector(samples) == true and
+              is_vector(labels) == true
+            - samples.size() == labels.size()
+        ensures
+            - randomizes the order of the samples and labels but preserves
+              the pairing between each sample and its label
+            - A default initialized random number generator is used to perform the randomizing.
+              Note that this means that each call this this function does the same thing.  
+              That is, the random number generator always uses the same seed.
+            - for all valid i:
+                - let r == the random index samples(i) was moved to.  then:
+                    - #labels(r) == labels(i)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U,
+        typename rand_type
+        >
+    void randomize_samples (
+        T& samples,
+        U& labels,
+        rand_type& rnd
+    );
+    /*!
+        requires
+            - T == a matrix object or an object compatible with std::vector that contains 
+              a swappable type.
+            - U == a matrix object or an object compatible with std::vector that contains 
+              a swappable type.
+            - if samples or labels are matrix objects then is_vector(samples) == true and
+              is_vector(labels) == true
+            - samples.size() == labels.size()
+            - rand_type == a type that implements the dlib/rand/rand_kernel_abstract.h interface
+        ensures
+            - randomizes the order of the samples and labels but preserves
+              the pairing between each sample and its label
+            - the given rnd random number generator object is used to do the randomizing
+            - for all valid i:
+                - let r == the random index samples(i) was moved to.  then:
+                    - #labels(r) == labels(i)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T
+        >
+    void randomize_samples (
+        T& samples
+    );
+    /*!
+        requires
+            - T == a matrix object or an object compatible with std::vector that contains 
+              a swappable type.
+            - if (samples is a matrix) then 
+                - is_vector(samples) == true 
+        ensures
+            - randomizes the order of the elements inside samples 
+            - A default initialized random number generator is used to perform the randomizing.
+              Note that this means that each call this this function does the same thing.  
+              That is, the random number generator always uses the same seed.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename rand_type
+        >
+    void randomize_samples (
+        T& samples,
+        rand_type& rnd
+    );
+    /*!
+        requires
+            - T == a matrix object or an object compatible with std::vector that contains 
+              a swappable type.
+            - rand_type == a type that implements the dlib/rand/rand_kernel_abstract.h interface
+            - if (samples is a matrix) then 
+                - is_vector(samples) == true 
+        ensures
+            - randomizes the order of the elements inside samples 
+            - the given rnd random number generator object is used to do the randomizing
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U,
+        typename V
+        >
+    void randomize_samples (
+        T& samples,
+        U& labels,
+        V& auxiliary
+    );
+    /*!
+        requires
+            - T == a matrix object or an object compatible with std::vector that contains 
+              a swappable type.
+            - U == a matrix object or an object compatible with std::vector that contains 
+              a swappable type.
+            - V == a matrix object or an object compatible with std::vector that contains 
+              a swappable type.
+            - if (samples, labels, or auxiliary are matrix objects) then 
+                - is_vector(samples) == true 
+                - is_vector(labels) == true
+                - is_vector(auxiliary) == true
+            - samples.size() == labels.size() == auxiliary.size()
+        ensures
+            - randomizes the order of the samples, labels, and auxiliary but preserves the
+              pairing between each sample, its label, and its auxiliary value.
+            - A default initialized random number generator is used to perform the
+              randomizing.  Note that this means that each call this this function does the
+              same thing.  That is, the random number generator always uses the same seed.
+            - for all valid i:
+                - let r == the random index samples(i) was moved to.  then:
+                    - #labels(r) == labels(i)
+                    - #auxiliary(r) == auxiliary(i)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename T,
+        typename U,
+        typename V,
+        typename rand_type
+        >
+    void randomize_samples (
+        T& samples,
+        U& labels,
+        V& auxiliary,
+        rand_type& rnd
+    );
+    /*!
+        requires
+            - T == a matrix object or an object compatible with std::vector that contains 
+              a swappable type.
+            - U == a matrix object or an object compatible with std::vector that contains 
+              a swappable type.
+            - V == a matrix object or an object compatible with std::vector that contains 
+              a swappable type.
+            - if (samples, labels, or auxiliary are matrix objects) then 
+                - is_vector(samples) == true 
+                - is_vector(labels) == true
+                - is_vector(auxiliary) == true
+            - samples.size() == labels.size() == auxiliary.size()
+            - rand_type == a type that implements the dlib/rand/rand_kernel_abstract.h interface
+        ensures
+            - randomizes the order of the samples, labels, and auxiliary but preserves the
+              pairing between each sample, its label, and its auxiliary value.
+            - the given rnd random number generator object is used to do the randomizing
+            - for all valid i:
+                - let r == the random index samples(i) was moved to.  then:
+                    - #labels(r) == labels(i)
+                    - #auxiliary(r) == auxiliary(i)
+    !*/
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_ABSTRACT_
+
+
diff --git a/ml/dlib/dlib/svm/svm_c_ekm_trainer.h b/ml/dlib/dlib/svm/svm_c_ekm_trainer.h
new file mode 100644
index 000000000..735e0f22e
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_c_ekm_trainer.h
@@ -0,0 +1,636 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVM_C_EKm_TRAINER_Hh_
+#define DLIB_SVM_C_EKm_TRAINER_Hh_
+
+#include "../algs.h"
+#include "function.h"
+#include "kernel.h"
+#include "empirical_kernel_map.h"
+#include "svm_c_linear_trainer.h"
+#include "svm_c_ekm_trainer_abstract.h"
+#include "../statistics.h"
+#include "../rand.h"
+#include <vector>
+
+namespace dlib
+{
+    template <
+        typename K 
+        >
+    class svm_c_ekm_trainer
+    {
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svm_c_ekm_trainer (
+        )
+        {
+            verbose = false;
+            ekm_stale = true;
+
+            initial_basis_size = 10;
+            basis_size_increment = 50;
+            max_basis_size = 300;
+        }
+
+        explicit svm_c_ekm_trainer (
+            const scalar_type& C 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t svm_c_ekm_trainer::svm_c_ekm_trainer()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+
+            ocas.set_c(C);
+            verbose = false;
+            ekm_stale = true;
+
+            initial_basis_size = 10;
+            basis_size_increment = 50;
+            max_basis_size = 300;
+        }
+
+        void set_epsilon (
+            scalar_type eps
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps > 0,
+                "\t void svm_c_ekm_trainer::set_epsilon()"
+                << "\n\t eps must be greater than 0"
+                << "\n\t eps: " << eps 
+                << "\n\t this: " << this
+                );
+
+            ocas.set_epsilon(eps);
+        }
+
+        const scalar_type get_epsilon (
+        ) const
+        {
+            return ocas.get_epsilon();
+        }
+
+        void set_max_iterations (
+            unsigned long max_iter
+        )
+        {
+            ocas.set_max_iterations(max_iter);
+        }
+
+        unsigned long get_max_iterations (
+        )
+        {
+            return ocas.get_max_iterations();
+        }
+
+        void be_verbose (
+        ) 
+        { 
+            verbose = true;
+            ocas.be_quiet(); 
+        }
+
+        void be_very_verbose (
+        )
+        {
+            verbose = true;
+            ocas.be_verbose(); 
+        }
+
+        void be_quiet (
+        )
+        { 
+            verbose = false;
+            ocas.be_quiet(); 
+        }
+
+        void set_oca (
+            const oca& item
+        )
+        {
+            ocas.set_oca(item);
+        }
+
+        const oca get_oca (
+        ) const
+        {
+            return ocas.get_oca();
+        }
+
+        const kernel_type get_kernel (
+        ) const
+        {
+            return kern;
+        }
+
+        void set_kernel (
+            const kernel_type& k
+        )
+        {
+            kern = k;
+            ekm_stale = true;
+        }
+
+        template <typename T>
+        void set_basis (
+            const T& basis_samples
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(basis_samples.size() > 0 && is_vector(mat(basis_samples)),
+                "\tvoid svm_c_ekm_trainer::set_basis(basis_samples)"
+                << "\n\t You have to give a non-empty set of basis_samples and it must be a vector"
+                << "\n\t basis_samples.size():                       " << basis_samples.size() 
+                << "\n\t is_vector(mat(basis_samples)): " << is_vector(mat(basis_samples)) 
+                << "\n\t this: " << this
+                );
+
+            basis = mat(basis_samples);
+            ekm_stale = true;
+        }
+
+        bool basis_loaded(
+        ) const
+        {
+            return (basis.size() != 0);
+        }
+
+        void clear_basis (
+        )
+        {
+            basis.set_size(0);
+            ekm.clear();
+            ekm_stale = true;
+        }
+
+        unsigned long get_max_basis_size (
+        ) const
+        {
+            return max_basis_size;
+        }
+
+        void set_max_basis_size (
+            unsigned long max_basis_size_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(max_basis_size_ > 0,
+                "\t void svm_c_ekm_trainer::set_max_basis_size()"
+                << "\n\t max_basis_size_ must be greater than 0"
+                << "\n\t max_basis_size_: " << max_basis_size_ 
+                << "\n\t this:            " << this
+                );
+
+            max_basis_size = max_basis_size_;
+            if (initial_basis_size > max_basis_size)
+                initial_basis_size = max_basis_size;
+        }
+
+        unsigned long get_initial_basis_size (
+        ) const
+        {
+            return initial_basis_size;
+        }
+
+        void set_initial_basis_size (
+            unsigned long initial_basis_size_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(initial_basis_size_ > 0,
+                "\t void svm_c_ekm_trainer::set_initial_basis_size()"
+                << "\n\t initial_basis_size_ must be greater than 0"
+                << "\n\t initial_basis_size_: " << initial_basis_size_ 
+                << "\n\t this:                " << this
+                );
+
+            initial_basis_size = initial_basis_size_;
+
+            if (initial_basis_size > max_basis_size)
+                max_basis_size = initial_basis_size;
+        }
+
+        unsigned long get_basis_size_increment (
+        ) const
+        {
+            return basis_size_increment;
+        }
+
+        void set_basis_size_increment (
+            unsigned long basis_size_increment_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(basis_size_increment_ > 0,
+                "\t void svm_c_ekm_trainer::set_basis_size_increment()"
+                << "\n\t basis_size_increment_ must be greater than 0"
+                << "\n\t basis_size_increment_: " << basis_size_increment_ 
+                << "\n\t this:                  " << this
+                );
+
+            basis_size_increment = basis_size_increment_;
+        }
+
+        void set_c (
+            scalar_type C 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t void svm_c_ekm_trainer::set_c()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            ocas.set_c(C);
+        }
+
+        const scalar_type get_c_class1 (
+        ) const
+        {
+            return ocas.get_c_class1();
+        }
+
+        const scalar_type get_c_class2 (
+        ) const
+        {
+            return ocas.get_c_class2();
+        }
+
+        void set_c_class1 (
+            scalar_type C
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t void svm_c_ekm_trainer::set_c_class1()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            ocas.set_c_class1(C);
+        }
+
+        void set_c_class2 (
+            scalar_type C
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t void svm_c_ekm_trainer::set_c_class2()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            ocas.set_c_class2(C);
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            scalar_type obj;
+            if (basis_loaded())
+                return do_train_user_basis(mat(x),mat(y),obj);
+            else
+                return do_train_auto_basis(mat(x),mat(y),obj);
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            scalar_type& svm_objective
+        ) const
+        {
+            if (basis_loaded())
+                return do_train_user_basis(mat(x),mat(y),svm_objective);
+            else
+                return do_train_auto_basis(mat(x),mat(y),svm_objective);
+        }
+
+
+    private:
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train_user_basis (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            scalar_type& svm_objective
+        ) const
+        /*!
+            requires
+                - basis_loaded() == true
+            ensures
+                - trains an SVM with the user supplied basis
+        !*/
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
+                "\t decision_function svm_c_ekm_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t y.nr(): " << y.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                << "\n\t y.nc(): " << y.nc() 
+                << "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y)
+                );
+
+            if (ekm_stale)
+            {
+                ekm.load(kern, basis);
+                ekm_stale = false;
+            }
+
+            // project all the samples with the ekm
+            running_stats<scalar_type> rs;
+            std::vector<matrix<scalar_type,0,1, mem_manager_type> > proj_samples;
+            proj_samples.reserve(x.size());
+            for (long i = 0; i < x.size(); ++i)
+            {
+                if (verbose)
+                {
+                    scalar_type err;
+                    proj_samples.push_back(ekm.project(x(i), err));
+                    rs.add(err);
+                }
+                else
+                {
+                    proj_samples.push_back(ekm.project(x(i)));
+                }
+            }
+
+            if (verbose)
+            {
+                std::cout << "\nMean EKM projection error:                  " << rs.mean() << std::endl;
+                std::cout << "Standard deviation of EKM projection error: " << rs.stddev() << std::endl;
+            }
+            
+            // now do the training
+            decision_function<linear_kernel<matrix<scalar_type,0,1, mem_manager_type> > > df;
+            df = ocas.train(proj_samples, y, svm_objective);
+
+            if (verbose)
+            {
+                std::cout << "Final svm objective: " << svm_objective << std::endl;
+            }
+
+            decision_function<kernel_type> final_df;
+            final_df = ekm.convert_to_decision_function(df.basis_vectors(0));
+            final_df.b = df.b;
+            return final_df;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train_auto_basis (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            scalar_type& svm_objective
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
+                "\t decision_function svm_c_ekm_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t y.nr(): " << y.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                << "\n\t y.nc(): " << y.nc() 
+                << "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y)
+                );
+
+
+            std::vector<matrix<scalar_type,0,1, mem_manager_type> > proj_samples(x.size());
+            decision_function<linear_kernel<matrix<scalar_type,0,1, mem_manager_type> > > df;
+
+            // we will use a linearly_independent_subset_finder to store our basis set. 
+            linearly_independent_subset_finder<kernel_type> lisf(get_kernel(), max_basis_size);
+
+            dlib::rand rnd;
+
+            // first pick the initial basis set randomly
+            for (unsigned long i = 0; i < 10*initial_basis_size && lisf.size() < initial_basis_size; ++i)
+            {
+                lisf.add(x(rnd.get_random_32bit_number()%x.size()));
+            }
+
+            ekm.load(lisf);
+
+            // first project all samples into the span of the current basis 
+            for (long i = 0; i < x.size(); ++i)
+            {
+                proj_samples[i] = ekm.project(x(i));
+            }
+
+
+            svm_c_linear_trainer<linear_kernel<matrix<scalar_type,0,1,mem_manager_type> > > trainer(ocas);
+
+            const scalar_type min_epsilon = trainer.get_epsilon();
+            // while we are determining what the basis set will be we are going to use a very
+            // lose stopping condition.  We will tighten it back up before producing the
+            // final decision_function.
+            trainer.set_epsilon(0.2);
+
+            scalar_type prev_svm_objective = std::numeric_limits<scalar_type>::max();
+
+            empirical_kernel_map<kernel_type> prev_ekm;
+
+            // This loop is where we try to generate a basis for SVM training.  We will
+            // do this by repeatedly training the SVM and adding a few points which violate the
+            // margin to the basis in each iteration.
+            while (true)
+            {
+                // if the basis is already as big as it's going to get then just do the most
+                // accurate training right now.  
+                if (lisf.size() == max_basis_size)
+                    trainer.set_epsilon(min_epsilon);
+
+                while (true)
+                {
+                    // now do the training.  
+                    df = trainer.train(proj_samples, y, svm_objective);
+
+                    if (svm_objective < prev_svm_objective)
+                        break;
+
+                    // If the training didn't reduce the objective more than last time then
+                    // try lowering the epsilon and doing it again.
+                    if (trainer.get_epsilon() > min_epsilon)
+                    {
+                        trainer.set_epsilon(std::max(trainer.get_epsilon()*0.5, min_epsilon));
+                        if (verbose)
+                            std::cout << " *** Reducing epsilon to " << trainer.get_epsilon() << std::endl;
+                    }
+                    else
+                        break;
+                }
+
+                if (verbose)
+                {
+                    std::cout << "svm objective: " << svm_objective << std::endl;
+                    std::cout << "basis size: " << lisf.size() << std::endl;
+                }
+
+                // if we failed to make progress on this iteration then we are done
+                if (svm_objective >= prev_svm_objective)
+                    break;
+
+                prev_svm_objective = svm_objective;
+
+                // now add more elements to the basis
+                unsigned long count = 0;
+                for (unsigned long j = 0; 
+                     (j < 100*basis_size_increment) && (count < basis_size_increment) && (lisf.size() < max_basis_size); 
+                     ++j)
+                {
+                    // pick a random sample
+                    const unsigned long idx = rnd.get_random_32bit_number()%x.size();
+                    // If it is a margin violator then it is useful to add it into the basis set.
+                    if (df(proj_samples[idx])*y(idx) < 1)
+                    {
+                        // Add the sample into the basis set if it is linearly independent of all the
+                        // vectors already in the basis set.  
+                        if (lisf.add(x(idx)))
+                        {
+                            ++count;
+                        }
+                    }
+                }
+                // if we couldn't add any more basis vectors then stop
+                if (count == 0)
+                {
+                    if (verbose)
+                        std::cout << "Stopping, couldn't add more basis vectors." << std::endl;
+                    break;
+                }
+
+
+                // Project all the samples into the span of our newly enlarged basis.  We will do this
+                // using the special transformation in the EKM that lets us project from a smaller
+                // basis set to a larger without needing to reevaluate kernel functions we have already
+                // computed.
+                ekm.swap(prev_ekm);
+                ekm.load(lisf);
+                projection_function<kernel_type> proj_part;
+                matrix<double> prev_to_new;
+                prev_ekm.get_transformation_to(ekm, prev_to_new, proj_part);
+
+                
+                matrix<scalar_type,0,1, mem_manager_type> temp;
+                for (long i = 0; i < x.size(); ++i)
+                {
+                    // assign to temporary to avoid memory allocation that would result if we
+                    // assigned this expression straight into proj_samples[i]
+                    temp = prev_to_new*proj_samples[i] + proj_part(x(i));
+                    proj_samples[i] = temp;
+
+                }
+            }
+            
+            // Reproject all the data samples using the final basis.  We could just use what we 
+            // already have but the recursive thing done above to compute the proj_samples 
+            // might have accumulated a little numerical error.  So lets just be safe.
+            running_stats<scalar_type> rs, rs_margin;
+            for (long i = 0; i < x.size(); ++i)
+            {
+                if (verbose)
+                {
+                    scalar_type err;
+                    proj_samples[i] = ekm.project(x(i),err);
+                    rs.add(err);
+                    // if this point is within the margin 
+                    if (df(proj_samples[i])*y(i) < 1)
+                        rs_margin.add(err);
+                }
+                else
+                {
+                    proj_samples[i] = ekm.project(x(i));
+                }
+            }
+
+            // do the final training
+            trainer.set_epsilon(min_epsilon);
+            df = trainer.train(proj_samples, y, svm_objective);
+
+
+            if (verbose)
+            {
+                std::cout << "\nMean EKM projection error:                  " << rs.mean() << std::endl;
+                std::cout << "Standard deviation of EKM projection error: " << rs.stddev() << std::endl;
+                std::cout << "Mean EKM projection error for margin violators:                  " << rs_margin.mean() << std::endl;
+                std::cout << "Standard deviation of EKM projection error for margin violators: " << ((rs_margin.current_n()>1)?rs_margin.stddev():0) << std::endl;
+
+                std::cout << "Final svm objective: " << svm_objective << std::endl;
+            }
+
+
+            decision_function<kernel_type> final_df;
+            final_df = ekm.convert_to_decision_function(df.basis_vectors(0));
+            final_df.b = df.b;
+
+            // we don't need the ekm anymore so clear it out
+            ekm.clear();
+
+            return final_df;
+        }
+
+
+
+
+        /*!
+            CONVENTION
+                - if (ekm_stale) then
+                    - kern or basis have changed since the last time
+                      they were loaded into the ekm
+        !*/
+
+        svm_c_linear_trainer<linear_kernel<matrix<scalar_type,0,1,mem_manager_type> > > ocas;
+        bool verbose;
+
+        kernel_type kern;
+        unsigned long max_basis_size;
+        unsigned long basis_size_increment;
+        unsigned long initial_basis_size;
+
+
+        matrix<sample_type,0,1,mem_manager_type> basis;
+        mutable empirical_kernel_map<kernel_type> ekm;
+        mutable bool ekm_stale; 
+
+    }; 
+
+}
+
+#endif // DLIB_SVM_C_EKm_TRAINER_Hh_
+
+
+
diff --git a/ml/dlib/dlib/svm/svm_c_ekm_trainer_abstract.h b/ml/dlib/dlib/svm/svm_c_ekm_trainer_abstract.h
new file mode 100644
index 000000000..d1ba2bf5f
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_c_ekm_trainer_abstract.h
@@ -0,0 +1,384 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVM_C_EKm_TRAINER_ABSTRACT_Hh_
+#ifdef DLIB_SVM_C_EKm_TRAINER_ABSTRACT_Hh_
+
+#include "../algs.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "empirical_kernel_map_abstract.h"
+#include "svm_c_linear_trainer_abstract.h"
+
+namespace dlib
+{
+    template <
+        typename K 
+        >
+    class svm_c_ekm_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a tool for training the C formulation of 
+                a support vector machine.   It is implemented using the empirical_kernel_map
+                to kernelize the svm_c_linear_trainer.  This makes it a very fast algorithm
+                capable of learning from very large datasets.
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svm_c_ekm_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_oca() == oca() (i.e. an instance of oca with default parameters) 
+                - #get_c_class1() == 1
+                - #get_c_class2() == 1
+                - #get_epsilon() == 0.001
+                - #basis_loaded() == false
+                - #get_initial_basis_size() == 10
+                - #get_basis_size_increment() == 50 
+                - #get_max_basis_size() == 300
+                - this object will not be verbose unless be_verbose() is called
+                - #get_max_iterations() == 10000
+        !*/
+
+        explicit svm_c_ekm_trainer (
+            const scalar_type& C 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_oca() == oca() (i.e. an instance of oca with default parameters) 
+                - #get_c_class1() == C
+                - #get_c_class2() == C
+                - #get_epsilon() == 0.001
+                - #basis_loaded() == false
+                - #get_initial_basis_size() == 10
+                - #get_basis_size_increment() == 50
+                - #get_max_basis_size() == 300
+                - this object will not be verbose unless be_verbose() is called
+                - #get_max_iterations() == 10000
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer 
+                  to execute.
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        unsigned long get_max_iterations (
+        ); 
+        /*!
+            ensures
+                - returns the maximum number of iterations the SVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_very_verbose (
+        );
+        /*!
+            ensures
+                - This object will print a lot of status messages to standard out so that a 
+                  user can observe the progress of the algorithm.  In addition to the
+                  few status messages normal verbosity produces this setting also causes
+                  the underlying svm_c_linear_trainer to be verbose.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the optimizer used to solve the SVM problem.  
+        !*/
+
+        const kernel_type get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object
+        !*/
+
+        void set_kernel (
+            const kernel_type& k
+        );
+        /*!
+            ensures
+                - #get_kernel() == k 
+        !*/
+
+        template <typename T>
+        void set_basis (
+            const T& basis_samples
+        );
+        /*!
+            requires
+                - T must be a dlib::matrix type or something convertible to a matrix via mat()
+                  (e.g. a std::vector)
+                - is_vector(basis_samples) == true
+                - basis_samples.size() > 0
+                - get_kernel() must be capable of operating on the elements of basis_samples.  That is,
+                  expressions such as get_kernel()(basis_samples(0), basis_samples(0)) should make sense.
+            ensures
+                - #basis_loaded() == true
+                - training will be carried out in the span of the given basis_samples
+        !*/
+
+        bool basis_loaded (
+        ) const;
+        /*!
+            ensures
+                - returns true if this object has been loaded with user supplied basis vectors and false otherwise.
+        !*/
+
+        void clear_basis (
+        );
+        /*!
+            ensures
+                - #basis_loaded() == false
+        !*/
+
+        unsigned long get_max_basis_size (
+        ) const;
+        /*!
+            ensures
+                - returns the maximum number of basis vectors this object is allowed
+                  to use.  This parameter only matters when the user has not supplied 
+                  a basis via set_basis().
+        !*/
+
+        void set_max_basis_size (
+            unsigned long max_basis_size
+        );
+        /*!
+            requires
+                - max_basis_size > 0
+            ensures
+                - #get_max_basis_size() == max_basis_size 
+                - if (get_initial_basis_size() > max_basis_size) then
+                    - #get_initial_basis_size() == max_basis_size
+        !*/
+
+        unsigned long get_initial_basis_size (
+        ) const;
+        /*!
+            ensures
+                - If the user does not supply a basis via set_basis() then this object
+                  will generate one automatically.  It does this by starting with
+                  a small basis of size N and repeatedly adds basis vectors to it
+                  until a stopping condition is reached.  This function returns that
+                  initial size N.
+        !*/
+
+        void set_initial_basis_size (
+            unsigned long initial_basis_size
+        );
+        /*!
+            requires
+                - initial_basis_size > 0
+            ensures
+                - #get_initial_basis_size() == initial_basis_size
+                - if (initial_basis_size > get_max_basis_size()) then
+                    - #get_max_basis_size() == initial_basis_size
+        !*/
+
+        unsigned long get_basis_size_increment (
+        ) const;
+        /*!
+            ensures
+                - If the user does not supply a basis via set_basis() then this object
+                  will generate one automatically.  It does this by starting with a small 
+                  basis and repeatedly adds sets of N basis vectors to it until a stopping 
+                  condition is reached.  This function returns that increment size N.
+        !*/
+
+        void set_basis_size_increment (
+            unsigned long basis_size_increment
+        );
+        /*!
+            requires
+                - basis_size_increment > 0
+            ensures
+                - #get_basis_size_increment() == basis_size_increment
+        !*/
+
+        void set_c (
+            scalar_type C 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c_class1() == C 
+                - #get_c_class2() == C 
+        !*/
+
+        const scalar_type get_c_class1 (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter for the +1 class.  
+                  It is the parameter that determines the trade off between
+                  trying to fit the +1 training data exactly or allowing more errors 
+                  but hopefully improving the generalization ability of the 
+                  resulting classifier.  Larger values encourage exact fitting 
+                  while smaller values of C may encourage better generalization. 
+        !*/
+
+        const scalar_type get_c_class2 (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter for the -1 class.  
+                  It is the parameter that determines the trade off between
+                  trying to fit the -1 training data exactly or allowing more errors 
+                  but hopefully improving the generalization ability of the 
+                  resulting classifier.  Larger values encourage exact fitting 
+                  while smaller values of C may encourage better generalization. 
+        !*/
+
+        void set_c_class1 (
+            scalar_type C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c_class1() == C
+        !*/
+
+        void set_c_class2 (
+            scalar_type C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c_class2() == C
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - is_binary_classification_problem(x,y) == true
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+            ensures
+                - trains a C support vector classifier given the training samples in x and 
+                  labels in y.  
+                - if (basis_loaded()) then
+                    - training will be carried out in the span of the user supplied basis vectors
+                - else
+                    - this object will attempt to automatically select an appropriate basis
+
+                - returns a decision function F with the following properties:
+                    - if (new_x is a sample predicted have +1 label) then
+                        - F(new_x) >= 0
+                    - else
+                        - F(new_x) < 0
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            scalar_type& svm_objective
+        ) const;
+        /*!
+            requires
+                - is_binary_classification_problem(x,y) == true
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+            ensures
+                - trains a C support vector classifier given the training samples in x and 
+                  labels in y.  
+                - if (basis_loaded()) then
+                    - training will be carried out in the span of the user supplied basis vectors
+                - else
+                    - this object will attempt to automatically select an appropriate basis
+
+                - #svm_objective == the final value of the SVM objective function
+                - returns a decision function F with the following properties:
+                    - if (new_x is a sample predicted have +1 label) then
+                        - F(new_x) >= 0
+                    - else
+                        - F(new_x) < 0
+        !*/
+
+    }; 
+
+}
+
+#endif // DLIB_SVM_C_EKm_TRAINER_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/svm_c_linear_dcd_trainer.h b/ml/dlib/dlib/svm/svm_c_linear_dcd_trainer.h
new file mode 100644
index 000000000..039b70993
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_c_linear_dcd_trainer.h
@@ -0,0 +1,712 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_C_LINEAR_DCD_TRAINER_Hh_ 
+#define DLIB_SVm_C_LINEAR_DCD_TRAINER_Hh_
+
+#include "svm_c_linear_dcd_trainer_abstract.h"
+#include <cmath>
+#include <limits>
+#include "../matrix.h"
+#include "../algs.h"
+#include "../rand.h"
+#include "svm.h"
+
+#include "function.h"
+#include "kernel.h"
+
+namespace dlib 
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_c_linear_dcd_trainer
+    {
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+        typedef typename decision_function<K>::sample_vector_type sample_vector_type;
+        typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
+
+        // You are getting a compiler error on this line because you supplied a non-linear
+        // kernel to the svm_c_linear_dcd_trainer object.  You have to use one of the
+        // linear kernels with this trainer.
+        COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value ||
+                             is_same_type<K, sparse_linear_kernel<sample_type> >::value ));
+
+        svm_c_linear_dcd_trainer (
+        ) :
+            Cpos(1),
+            Cneg(1),
+            eps(0.1),
+            max_iterations(10000),
+            verbose(false),
+            have_bias(true),
+            last_weight_1(false),
+            do_shrinking(true),
+            do_svm_l2(false)
+        {
+        }
+
+        explicit svm_c_linear_dcd_trainer (
+            const scalar_type& C_
+        ) :
+            Cpos(C_),
+            Cneg(C_),
+            eps(0.1),
+            max_iterations(10000),
+            verbose(false),
+            have_bias(true),
+            last_weight_1(false),
+            do_shrinking(true),
+            do_svm_l2(false)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < C_,
+                "\tsvm_c_trainer::svm_c_linear_dcd_trainer(kernel,C)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t C_: " << C_
+                );
+        }
+
+        bool includes_bias (
+        ) const 
+        { 
+            return have_bias; 
+        }
+
+        void include_bias (
+            bool should_have_bias
+        ) 
+        { 
+            have_bias = should_have_bias; 
+        }
+
+        bool forces_last_weight_to_1 (
+        ) const
+        {
+            return last_weight_1;
+        }
+
+        void force_last_weight_to_1 (
+            bool should_last_weight_be_1
+        )
+        {
+            last_weight_1 = should_last_weight_be_1;
+        }
+
+        bool shrinking_enabled (
+        ) const { return do_shrinking; }
+
+        void enable_shrinking (
+            bool enabled
+        ) { do_shrinking = enabled; }
+
+        bool solving_svm_l2_problem (
+        ) const { return do_svm_l2; }
+
+        void solve_svm_l2_problem (
+            bool enabled
+        ) { do_svm_l2 = enabled; }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid svm_c_linear_dcd_trainer::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps_: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const
+        { 
+            return eps;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel_type();
+        }
+
+        unsigned long get_max_iterations (
+        ) const { return max_iterations; }
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ) 
+        {
+            max_iterations = max_iter;
+        }
+
+        void set_c (
+            scalar_type C 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t void svm_c_linear_dcd_trainer::set_c()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            Cpos = C;
+            Cneg = C;
+        }
+
+        const scalar_type get_c_class1 (
+        ) const
+        {
+            return Cpos;
+        }
+
+        const scalar_type get_c_class2 (
+        ) const
+        {
+            return Cneg;
+        }
+
+        void set_c_class1 (
+            scalar_type C
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t void svm_c_linear_dcd_trainer::set_c_class1()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            Cpos = C;
+        }
+
+        void set_c_class2 (
+            scalar_type C
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t void svm_c_linear_dcd_trainer::set_c_class2()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            Cneg = C;
+        }
+
+        class optimizer_state
+        {
+            friend class svm_c_linear_dcd_trainer;
+
+        public:
+            optimizer_state() : did_init(false) {}
+
+        private:
+
+            template <
+                typename in_sample_vector_type,
+                typename in_scalar_vector_type
+                >
+            void init(
+                const in_sample_vector_type& x,
+                const in_scalar_vector_type& y,
+                bool have_bias_,
+                bool last_weight_1_,
+                bool do_svm_l2_,
+                scalar_type Cpos,
+                scalar_type Cneg
+            )
+            {
+                const long new_dims = max_index_plus_one(x);
+                long new_idx = 0;
+
+                if (did_init)
+                {
+                    DLIB_CASSERT(have_bias_ == have_bias &&
+                                 last_weight_1_ == last_weight_1, 
+                                "\t decision_function svm_c_linear_dcd_trainer::train(x,y,state)"
+                                << "\n\t The given state object is invalid because the previous trainer was configured differently."
+                                << "\n\t have_bias_:     " << have_bias_
+                                << "\n\t have_bias:      " << have_bias
+                                << "\n\t last_weight_1_: " << last_weight_1_
+                                << "\n\t last_weight_1:  " << last_weight_1
+                                 );
+
+                    DLIB_CASSERT( new_dims >= dims,
+                                "\t decision_function svm_c_linear_dcd_trainer::train(x,y,state)"
+                                << "\n\t The given state object is invalid because the training data dimensions have shrunk."
+                                << "\n\t new_dims:  " << new_dims
+                                << "\n\t dims:      " << dims 
+                        );
+
+                    DLIB_CASSERT( x.size() >= static_cast<long>(alpha.size()),
+                                "\t decision_function svm_c_linear_dcd_trainer::train(x,y,state)"
+                                << "\n\t The given state object is invalid because the training data has fewer samples than previously."
+                                << "\n\t x.size():     " << x.size() 
+                                << "\n\t alpha.size(): " << alpha.size() 
+                        );
+
+                    // make sure we amortize the cost of growing the alpha vector.
+                    if (alpha.capacity() < static_cast<unsigned long>(x.size()))
+                        alpha.reserve(x.size()*2);
+
+                    new_idx = alpha.size();
+
+                    // Make sure alpha has the same length as x.  So pad with extra zeros if
+                    // necessary to make this happen.
+                    alpha.resize(x.size(),0);
+
+
+                    if (new_dims != dims)
+                    {
+                        // The only valid way the dimensions can be different here is if
+                        // you are using a sparse vector type.  This is because we might
+                        // have had training samples which just happened to not include all
+                        // the features previously.  Therefore, max_index_plus_one() would
+                        // have given too low of a result.  But for dense vectors it is
+                        // definitely a user error if the dimensions don't match.
+
+                        DLIB_CASSERT(is_matrix<sample_type>::value == false, 
+                                "\t decision_function svm_c_linear_dcd_trainer::train(x,y,state)"
+                                << "\n\t The given state object is invalid because the training data dimensions have changed."
+                                << "\n\t new_dims:  " << new_dims
+                                << "\n\t dims:      " << dims 
+                            );
+
+                        // extend w by the right number of elements
+                        if (have_bias && !last_weight_1)
+                        {
+                            // Splice some zeros into the w vector so it will have the
+                            // right length.  Here we are being careful to move the bias
+                            // weight to the end of the resulting vector.
+                            w = join_cols(join_cols(
+                                    colm(w,0,dims), 
+                                    zeros_matrix<scalar_type>(new_dims-dims,1)), 
+                                    uniform_matrix<scalar_type>(1,1,w(dims))
+                                    );
+                        }
+                        else
+                        {
+                            // Just concatenate the right number of zeros.
+                            w = join_cols(w, zeros_matrix<scalar_type>(new_dims-dims,1));
+                        }
+                        dims = new_dims;
+                    }
+
+                }
+                else
+                {
+                    did_init = true;
+                    have_bias = have_bias_;
+                    last_weight_1 = last_weight_1_;
+                    dims = new_dims;
+
+                    alpha.resize(x.size());
+
+                    index.reserve(x.size());
+                    Q.reserve(x.size());
+
+                    if (have_bias && !last_weight_1)
+                        w.set_size(dims+1);
+                    else
+                        w.set_size(dims);
+
+                    w = 0;
+                }
+
+                for (long i = new_idx; i < x.size(); ++i)
+                {
+                    Q.push_back(length_squared(x(i)));
+
+                    if (have_bias && !last_weight_1)
+                    {
+                        index.push_back(i);
+                        Q.back() += 1;
+                    }
+                    else if (Q.back() != 0)
+                    {
+                        index.push_back(i);
+                    }
+
+                    if (do_svm_l2_)
+                    {
+                        if (y(i) > 0)
+                            Q.back() += 1/(2*Cpos);
+                        else
+                            Q.back() += 1/(2*Cneg);
+                    }
+                }
+
+                if (last_weight_1)
+                    w(dims-1) = 1;
+            }
+
+            template <typename T>
+            typename enable_if<is_matrix<T>,scalar_type>::type length_squared (const T& x) const
+            {
+                if (!last_weight_1)
+                {
+                    return dlib::dot(x,x);
+                }
+                else
+                {
+                    // skip the last dimension
+                    return dlib::dot(colm(x,0,x.size()-1), 
+                                     colm(x,0,x.size()-1));
+                }
+
+            }
+
+            template <typename T>
+            typename disable_if<is_matrix<T>,scalar_type>::type length_squared (const T& x) const
+            {
+                if (!last_weight_1)
+                {
+                    return dlib::dot(x,x);
+                }
+                else
+                {
+                    scalar_type temp = 0;
+                    typename T::const_iterator i;
+                    for (i = x.begin(); i != x.end(); ++i)
+                    {
+                        // skip the last dimension
+                        if (static_cast<long>(i->first) < dims-1)
+                            temp += i->second*i->second;
+                    }
+                    return temp;
+                }
+            }
+
+
+            bool did_init;
+            bool have_bias;
+            bool last_weight_1;
+            std::vector<scalar_type> alpha;
+            scalar_vector_type w;
+            std::vector<scalar_type> Q;
+            std::vector<long> index;
+            long dims;
+            dlib::rand rnd;
+
+        public:
+
+            const std::vector<scalar_type>& get_alpha () const { return alpha; }
+
+            friend void serialize(const optimizer_state& item, std::ostream& out)
+            {
+                const int version = 1;
+                dlib::serialize(version, out);
+                dlib::serialize(item.did_init, out);
+                dlib::serialize(item.have_bias, out);
+                dlib::serialize(item.last_weight_1, out);
+                dlib::serialize(item.alpha, out);
+                dlib::serialize(item.w, out);
+                dlib::serialize(item.Q, out);
+                dlib::serialize(item.index, out);
+                dlib::serialize(item.dims, out);
+                dlib::serialize(item.rnd, out);
+            }
+
+            friend void deserialize(optimizer_state& item, std::istream& in)
+            {
+                int version = 0;
+                dlib::deserialize(version, in);
+                if (version != 1)
+                {
+                    throw dlib::serialization_error(
+                        "Error while deserializing dlib::svm_c_linear_dcd_trainer::optimizer_state, unexpected version."
+                        );
+                }
+
+                dlib::deserialize(item.did_init, in);
+                dlib::deserialize(item.have_bias, in);
+                dlib::deserialize(item.last_weight_1, in);
+                dlib::deserialize(item.alpha, in);
+                dlib::deserialize(item.w, in);
+                dlib::deserialize(item.Q, in);
+                dlib::deserialize(item.index, in);
+                dlib::deserialize(item.dims, in);
+                dlib::deserialize(item.rnd, in);
+            }
+
+        };
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            optimizer_state state;
+            return do_train(mat(x), mat(y), state);
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            optimizer_state& state 
+        ) const
+        {
+            return do_train(mat(x), mat(y), state);
+        }
+
+    private:
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            optimizer_state& state 
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(x,y) == true,
+                "\t decision_function svm_c_linear_dcd_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.size(): " << x.size() 
+                << "\n\t y.size(): " << y.size() 
+                << "\n\t is_learning_problem(x,y): " << is_learning_problem(x,y)
+                );
+#ifdef ENABLE_ASSERTS
+            for (long i = 0; i < x.size(); ++i)
+            {
+                DLIB_ASSERT(y(i) == +1 || y(i) == -1,
+                    "\t decision_function svm_c_linear_dcd_trainer::train(x,y)"
+                    << "\n\t invalid inputs were given to this function"
+                    << "\n\t y("<<i<<"): " << y(i)
+                );
+            }
+#endif
+
+            state.init(x,y,have_bias,last_weight_1,do_svm_l2,Cpos,Cneg);
+
+            std::vector<scalar_type>& alpha = state.alpha;
+            scalar_vector_type& w = state.w;
+            std::vector<long>& index = state.index;
+            const long dims = state.dims;
+
+
+            unsigned long active_size = index.size();
+
+            scalar_type PG_max_prev = std::numeric_limits<scalar_type>::infinity();
+            scalar_type PG_min_prev = -std::numeric_limits<scalar_type>::infinity();
+
+            const scalar_type Dii_pos = 1/(2*Cpos);
+            const scalar_type Dii_neg = 1/(2*Cneg);
+
+            // main loop
+            for (unsigned long iter = 0; iter < max_iterations; ++iter)
+            {
+                scalar_type PG_max = -std::numeric_limits<scalar_type>::infinity();
+                scalar_type PG_min = std::numeric_limits<scalar_type>::infinity();
+
+                // randomly shuffle the indices
+                for (unsigned long i = 0; i < active_size; ++i)
+                {
+                    // pick a random index >= i
+                    const long j = i + state.rnd.get_random_32bit_number()%(active_size-i);
+                    std::swap(index[i], index[j]);
+                }
+                
+                // for all the active training samples
+                for (unsigned long ii = 0; ii < active_size; ++ii)
+                {
+                    const long i = index[ii];
+
+                    scalar_type G = y(i)*dot(w, x(i)) - 1;
+                    if (do_svm_l2)
+                    {
+                        if (y(i) > 0)
+                            G += Dii_pos*alpha[i];
+                        else
+                            G += Dii_neg*alpha[i];
+                    }
+                    const scalar_type C = (y(i) > 0) ? Cpos : Cneg;
+                    const scalar_type U = do_svm_l2 ? std::numeric_limits<scalar_type>::infinity() : C;
+
+                    scalar_type PG = 0;
+                    if (alpha[i] == 0)
+                    {
+                        if (G > PG_max_prev)
+                        {
+                            // shrink the active set of training examples
+                            --active_size;
+                            std::swap(index[ii], index[active_size]);
+                            --ii;
+                            continue;
+                        }
+
+                        if (G < 0)
+                            PG = G;
+                    }
+                    else if (alpha[i] == U)
+                    {
+                        if (G < PG_min_prev)
+                        {
+                            // shrink the active set of training examples
+                            --active_size;
+                            std::swap(index[ii], index[active_size]);
+                            --ii;
+                            continue;
+                        }
+
+                        if (G > 0)
+                            PG = G;
+                    }
+                    else
+                    {
+                        PG = G;
+                    }
+
+                    if (PG > PG_max) 
+                        PG_max = PG;
+                    if (PG < PG_min) 
+                        PG_min = PG;
+
+                    // if PG != 0
+                    if (std::abs(PG) > 1e-12)
+                    {
+                        const scalar_type alpha_old = alpha[i];
+                        alpha[i] = std::min(std::max(alpha[i] - G/state.Q[i], (scalar_type)0.0), U);
+                        const scalar_type delta = (alpha[i]-alpha_old)*y(i);
+                        add_to(w, x(i), delta);
+                        if (have_bias && !last_weight_1)
+                            w(w.size()-1) -= delta;
+
+                        if (last_weight_1)
+                            w(dims-1) = 1;
+                    }
+
+                }
+
+                if (verbose)
+                {
+                    using namespace std;
+                    cout << "gap:         " << PG_max - PG_min << endl;
+                    cout << "active_size: " << active_size << endl;
+                    cout << "iter:        " << iter << endl;
+                    cout << endl;
+                }
+
+                if (PG_max - PG_min <= eps)
+                {
+                    // stop if we are within eps tolerance and the last iteration
+                    // was over all the samples
+                    if (active_size == index.size())
+                        break;
+
+                    // Turn off shrinking on the next iteration.  We will stop if the
+                    // tolerance is still <= eps when shrinking is off.
+                    active_size = index.size();
+                    PG_max_prev = std::numeric_limits<scalar_type>::infinity();
+                    PG_min_prev = -std::numeric_limits<scalar_type>::infinity();
+                }
+                else if (do_shrinking)
+                {
+                    PG_max_prev = PG_max;
+                    PG_min_prev = PG_min;
+                    if (PG_max_prev <= 0)
+                        PG_max_prev = std::numeric_limits<scalar_type>::infinity();
+                    if (PG_min_prev >= 0)
+                        PG_min_prev = -std::numeric_limits<scalar_type>::infinity();
+                }
+
+            } // end of main optimization loop
+
+
+
+
+            // put the solution into a decision function and then return it
+            decision_function<kernel_type> df;
+            if (have_bias && !last_weight_1)
+                df.b = w(w.size()-1);
+            else
+                df.b = 0;
+
+            df.basis_vectors.set_size(1);
+            // Copy the plane normal into the output basis vector.  The output vector might
+            // be a sparse vector container so we need to use this special kind of copy to
+            // handle that case.  
+            assign(df.basis_vectors(0), colm(w, 0, dims));
+            df.alpha.set_size(1);
+            df.alpha(0) = 1;
+
+            return df;
+        }
+
+        scalar_type dot (
+            const scalar_vector_type& w,
+            const sample_type& sample
+        ) const
+        {
+            if (have_bias && !last_weight_1)
+            {
+                const long w_size_m1 = w.size()-1;
+                return dlib::dot(colm(w,0,w_size_m1), sample) - w(w_size_m1);
+            }
+            else
+            {
+                return dlib::dot(w, sample);
+            }
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        scalar_type Cpos;
+        scalar_type Cneg;
+        scalar_type eps;
+        unsigned long max_iterations;
+        bool verbose;
+        bool have_bias; // having a bias means we pretend all x vectors have an extra element which is always -1.
+        bool last_weight_1;
+        bool do_shrinking;
+        bool do_svm_l2;
+
+    }; // end of class svm_c_linear_dcd_trainer
+
+// ----------------------------------------------------------------------------------------
+
+
+}
+
+#endif // DLIB_SVm_C_LINEAR_DCD_TRAINER_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/svm_c_linear_dcd_trainer_abstract.h b/ml/dlib/dlib/svm/svm_c_linear_dcd_trainer_abstract.h
new file mode 100644
index 000000000..b57c54260
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_c_linear_dcd_trainer_abstract.h
@@ -0,0 +1,382 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_C_LINEAR_DCD_TRAINER_ABSTRACT_Hh_ 
+#ifdef DLIB_SVm_C_LINEAR_DCD_TRAINER_ABSTRACT_Hh_
+
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+
+namespace dlib 
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_c_linear_dcd_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                Is either linear_kernel or sparse_linear_kernel.  
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a tool for training the C formulation of a support
+                vector machine.  It is optimized for the case where linear kernels are
+                used.  
+
+
+                In particular, it is implemented using the algorithm described in the
+                following paper:
+                    A Dual Coordinate Descent Method for Large-scale Linear SVM
+                    by Cho-Jui Hsieh, Kai-Wei Chang, and Chih-Jen Lin
+
+                It solves the optimization problem of:
+                min_w: 0.5||w||^2 + C*sum_i (hinge loss for sample i)   
+                where w is the learned SVM parameter vector.
+
+                Note that this object is very similar to the svm_c_linear_trainer, however,
+                it interprets the C parameter slightly differently.  In particular, C for
+                the DCD trainer is not automatically divided by the number of samples like
+                it is with the svm_c_linear_trainer.  For example, a C value of 10 when
+                given to the svm_c_linear_trainer is equivalent to a C value of 10/N for
+                the svm_c_linear_dcd_trainer, where N is the number of training samples.
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+        typedef typename decision_function<K>::sample_vector_type sample_vector_type;
+        typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
+
+
+        svm_c_linear_dcd_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used to train a
+                  support vector machine.
+                - #get_c_class1() == 1
+                - #get_c_class2() == 1
+                - #get_epsilon() == 0.1
+                - #get_max_iterations() == 10000
+                - This object will not be verbose unless be_verbose() is called
+                - #forces_last_weight_to_1() == false
+                - #includes_bias() == true
+                - #shrinking_enabled() == true
+                - #solving_svm_l2_problem() == false
+        !*/
+
+        explicit svm_c_linear_dcd_trainer (
+            const scalar_type& C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - This object is properly initialized and ready to be used to train a
+                  support vector machine.
+                - #get_c_class1() == C
+                - #get_c_class2() == C
+                - #get_epsilon() == 0.1
+                - #get_max_iterations() == 10000
+                - This object will not be verbose unless be_verbose() is called
+                - #forces_last_weight_to_1() == false
+                - #includes_bias() == true
+                - #shrinking_enabled() == true
+                - #solving_svm_l2_problem() == false
+        !*/
+
+        bool includes_bias (
+        ) const;
+        /*!
+            ensures
+                - returns true if this trainer will produce decision_functions with
+                  non-zero bias values.  
+        !*/
+
+        void include_bias (
+            bool should_have_bias
+        );
+        /*!
+            ensures
+                - #includes_bias() == should_have_bias
+        !*/
+
+        bool forces_last_weight_to_1 (
+        ) const;
+        /*!
+            ensures
+                - returns true if this trainer has the constraint that the last weight in
+                  the learned parameter vector must be 1.  This is the weight corresponding
+                  to the feature in the training vectors with the highest dimension.  
+                - Forcing the last weight to 1 also disables the bias and therefore the b
+                  field of the learned decision_function will be 0 when forces_last_weight_to_1() == true.
+                  This is true regardless of the setting of #include_bias().
+        !*/
+
+        void force_last_weight_to_1 (
+            bool should_last_weight_be_1
+        );
+        /*!
+            ensures
+                - #forces_last_weight_to_1() == should_last_weight_be_1
+        !*/
+
+        bool shrinking_enabled (
+        ) const; 
+        /*!
+            ensures
+                - returns true if the shrinking heuristic is enabled.  Typically this makes
+                  the algorithm run a lot faster so it should be enabled.
+        !*/
+
+        void enable_shrinking (
+            bool enabled
+        ); 
+        /*!
+            ensures
+                - #shrinking_enabled() == enabled
+        !*/
+
+        bool solving_svm_l2_problem (
+        ) const; 
+        /*!
+            ensures
+                - returns true if this solver will solve the L2 version of the SVM
+                  objective function.  That is, if solving_svm_l2_problem()==true then this
+                  object, rather than using the hinge loss, uses the squared hinge loss.
+        !*/
+
+        void solve_svm_l2_problem (
+            bool enabled
+        ); 
+        /*!
+            ensures
+                - #solving_svm_l2_problem() == enabled
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_epsilon (
+            scalar_type eps_
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer to
+                  train.    
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object.  Since the
+                  linear kernels don't have any parameters this function just returns
+                  kernel_type()
+        !*/
+
+        unsigned long get_max_iterations (
+        ) const; 
+        /*!
+            ensures
+                - returns the maximum number of iterations the SVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ); 
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        void set_c (
+            scalar_type C 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c_class1() == C 
+                - #get_c_class2() == C 
+        !*/
+
+        const scalar_type get_c_class1 (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter for the +1 class.  It is the
+                  parameter that determines the trade off between trying to fit the +1
+                  training data exactly or allowing more errors but hopefully improving the
+                  generalization of the resulting classifier.  Larger values encourage
+                  exact fitting while smaller values of C may encourage better
+                  generalization. 
+        !*/
+
+        const scalar_type get_c_class2 (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter for the -1 class.  It is the
+                  parameter that determines the trade off between trying to fit the -1
+                  training data exactly or allowing more errors but hopefully improving the
+                  generalization of the resulting classifier.  Larger values encourage
+                  exact fitting while smaller values of C may encourage better
+                  generalization. 
+        !*/
+
+        void set_c_class1 (
+            scalar_type C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c_class1() == C
+        !*/
+
+        void set_c_class2 (
+            scalar_type C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c_class2() == C
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(x,y) == true
+                  (Note that it is ok for x.size() == 1)
+                - All elements of y must be equal to +1 or -1
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+            ensures
+                - Trains a C support vector classifier given the training samples in x and 
+                  labels in y.  
+                - returns a decision function F with the following properties:
+                    - F.alpha.size() == 1
+                    - F.basis_vectors.size() == 1
+                    - F.alpha(0) == 1
+                    - if (new_x is a sample predicted have +1 label) then
+                        - F(new_x) >= 0
+                    - else
+                        - F(new_x) < 0
+        !*/
+
+        // optimizer_state is used to record the internal state of the SVM optimizer.  It
+        // can be used with the following train() routine to warm-start the optimizer or
+        // access the optimal alpha values (see the Hsieh paper mentioned above).  The
+        // optimizer_state objects are serializable and allow you to get the alphas, but
+        // are otherwise completely opaque to the user.
+        class optimizer_state
+        {
+        public:
+            const std::vector<scalar_type>& get_alpha (
+            ) const; 
+        };
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            optimizer_state& state 
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(x,y) == true
+                  (Note that it is ok for x.size() == 1)
+                - All elements of y must be equal to +1 or -1
+                - state must be either a default initialized optimizer_state object or all the
+                  following conditions must be satisfied:
+                    - Let LAST denote the previous trainer used with the state object, then
+                      we must have: 
+                        - LAST.includes_bias() == includes_bias()
+                        - LAST.forces_last_weight_to_1() == forces_last_weight_to_1()
+                    - Let X denote the previous training samples used with state, then the
+                      following must be satisfied:
+                        - x.size() >= X.size()
+                        - for all valid i:
+                            - x(i) == X(i)
+                              (i.e. the samples x and X have in common must be identical.
+                              That is, the only allowed difference between x and X is that
+                              x might have new training samples appended onto its end)
+                        - if (x contains dense vectors) then
+                            - max_index_plus_one(x) == max_index_plus_one(X)
+                        - else
+                            - max_index_plus_one(x) >= max_index_plus_one(X)
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+            ensures
+                - Trains a C support vector classifier given the training samples in x and 
+                  labels in y.  
+                - The point of the state object is to allow you to warm start the SVM
+                  optimizer from the solution to a previous call to train().  Doing this
+                  might make the training run faster.  This is useful when you are trying
+                  different C values or have grown the training set and want to retrain.
+                - #state == the internal state of the optimizer at the solution to the SVM
+                  problem.  Therefore, passing #state to a new call to train() will start
+                  the optimizer from the current solution.
+                - #state.get_alpha().size() == x.size()
+                - #state.get_alpha() == the optimal alpha/dual values learned by the optimizer.
+                - returns a decision function F with the following properties:
+                    - F.alpha.size() == 1
+                    - F.basis_vectors.size() == 1
+                    - F.alpha(0) == 1
+                    - if (new_x is a sample predicted have +1 label) then
+                        - F(new_x) >= 0
+                    - else
+                        - F(new_x) < 0
+        !*/
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_C_LINEAR_DCD_TRAINER_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/svm_c_linear_trainer.h b/ml/dlib/dlib/svm/svm_c_linear_trainer.h
new file mode 100644
index 000000000..8d136d711
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_c_linear_trainer.h
@@ -0,0 +1,706 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVM_C_LiNEAR_TRAINER_Hh_
+#define DLIB_SVM_C_LiNEAR_TRAINER_Hh_
+
+#include "svm_c_linear_trainer_abstract.h"
+#include "../algs.h"
+#include "../optimization.h"
+#include "../matrix.h"
+#include "function.h"
+#include "kernel.h"
+#include <iostream>
+#include <vector>
+#include "sparse_vector.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type, 
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type
+        >
+    class oca_problem_c_svm : public oca_problem<matrix_type >
+    {
+    public:
+        /*
+            This class is used as part of the implementation of the svm_c_linear_trainer
+            defined towards the end of this file.
+
+
+            The bias parameter is dealt with by imagining that each sample vector has -1
+            as its last element.
+        */
+
+        typedef typename matrix_type::type scalar_type;
+
+        oca_problem_c_svm(
+            const scalar_type C_pos,
+            const scalar_type C_neg,
+            const in_sample_vector_type& samples_,
+            const in_scalar_vector_type& labels_,
+            const bool be_verbose_,
+            const scalar_type eps_,
+            const unsigned long max_iter,
+            const unsigned long dims_
+        ) :
+            samples(samples_),
+            labels(labels_),
+            C(std::min(C_pos,C_neg)),
+            Cpos(C_pos/C),
+            Cneg(C_neg/C),
+            be_verbose(be_verbose_),
+            eps(eps_),
+            max_iterations(max_iter),
+            dims(dims_)
+        {
+            dot_prods.resize(samples.size());
+            is_first_call = true;
+        }
+
+        virtual scalar_type get_c (
+        ) const 
+        {
+            return C;
+        }
+
+        virtual long get_num_dimensions (
+        ) const 
+        {
+            // plus 1 for the bias term
+            return dims + 1;
+        }
+
+        virtual bool optimization_status (
+            scalar_type current_objective_value,
+            scalar_type current_error_gap,
+            scalar_type current_risk_value,
+            scalar_type current_risk_gap,
+            unsigned long num_cutting_planes,
+            unsigned long num_iterations
+        ) const 
+        {
+            if (be_verbose)
+            {
+                using namespace std;
+                cout << "objective:     " << current_objective_value << endl;
+                cout << "objective gap: " << current_error_gap << endl;
+                cout << "risk:          " << current_risk_value << endl;
+                cout << "risk gap:      " << current_risk_gap << endl;
+                cout << "num planes:    " << num_cutting_planes << endl;
+                cout << "iter:          " << num_iterations << endl;
+                cout << endl;
+            }
+
+            if (num_iterations >= max_iterations)
+                return true;
+
+            if (current_risk_gap < eps)
+                return true;
+
+            return false;
+        }
+
+        virtual bool risk_has_lower_bound (
+            scalar_type& lower_bound
+        ) const 
+        { 
+            lower_bound = 0;
+            return true; 
+        }
+
+        virtual void get_risk (
+            matrix_type& w,
+            scalar_type& risk,
+            matrix_type& subgradient
+        ) const 
+        {
+            line_search(w);
+
+            subgradient.set_size(w.size(),1);
+            subgradient = 0;
+            risk = 0;
+
+
+            // loop over all the samples and compute the risk and its subgradient at the current solution point w
+            for (long i = 0; i < samples.size(); ++i)
+            {
+                // multiply current SVM output for the ith sample by its label
+                const scalar_type df_val = labels(i)*dot_prods[i];
+
+                if (labels(i) > 0)
+                    risk += Cpos*std::max<scalar_type>(0.0,1 - df_val);
+                else
+                    risk += Cneg*std::max<scalar_type>(0.0,1 - df_val);
+
+                if (df_val < 1)
+                {
+                    if (labels(i) > 0)
+                    {
+                        subtract_from(subgradient, samples(i), Cpos);
+
+                        subgradient(subgradient.size()-1) += Cpos;
+                    }
+                    else
+                    {
+                        add_to(subgradient, samples(i), Cneg);
+
+                        subgradient(subgradient.size()-1) -= Cneg;
+                    }
+                }
+            }
+
+            scalar_type scale = 1.0/samples.size();
+
+            risk *= scale;
+            subgradient = scale*subgradient;
+        }
+
+    private:
+
+    // -----------------------------------------------------
+    // -----------------------------------------------------
+
+        void line_search (
+            matrix_type& w
+        ) const
+        /*!
+            ensures
+                - does a line search to find a better w
+                - for all i: #dot_prods[i] == dot(colm(#w,0,w.size()-1), samples(i)) - #w(w.size()-1)
+        !*/
+        {
+            // The reason for using w_size_m1 and not just w.size()-1 is because
+            // doing it this way avoids an inane warning from gcc that can occur in some cases.
+            const long w_size_m1 = w.size()-1;
+            for (long i = 0; i < samples.size(); ++i)
+                dot_prods[i] = dot(colm(w,0,w_size_m1), samples(i)) - w(w_size_m1);
+
+            if (is_first_call)
+            {
+                is_first_call = false;
+                best_so_far = w;
+                dot_prods_best = dot_prods;
+            }
+            else
+            {
+                // do line search going from best_so_far to w.  Store results in w.  
+                // Here we use the line search algorithm presented in section 3.1.1 of Franc and Sonnenburg.
+
+                const scalar_type A0 = length_squared(best_so_far - w);
+                const scalar_type BB0 = dot(best_so_far, w - best_so_far);
+
+                const scalar_type scale_pos = (get_c()*Cpos)/samples.size();
+                const scalar_type scale_neg = (get_c()*Cneg)/samples.size();
+
+                ks.clear();
+                ks.reserve(samples.size());
+
+                scalar_type f0 = BB0;
+                for (long i = 0; i < samples.size(); ++i)
+                {
+                    const scalar_type& scale = (labels(i)>0) ? scale_pos : scale_neg;
+
+                    const scalar_type B = scale*labels(i) * ( dot_prods_best[i] - dot_prods[i]);
+                    const scalar_type C = scale*(1 - labels(i)* dot_prods_best[i]);
+                    // Note that if B is 0 then it doesn't matter what k is set to.  So 0 is fine.
+                    scalar_type k = 0;
+                    if (B != 0)
+                        k = -C/B;
+
+                    if (k > 0)
+                        ks.push_back(helper(k, std::abs(B)));
+
+                    if ( (B < 0 && k > 0) || (B > 0 && k <= 0) )
+                        f0 += B;
+                }
+
+                scalar_type opt_k = 1;
+                // ks.size() == 0 shouldn't happen but check anyway
+                if (f0 >= 0 || ks.size() == 0)
+                {
+                    // Getting here means that we aren't searching in a descent direction.  
+                    // We could take a zero step but instead lets just assign w to the new best
+                    // so far point just to make sure we don't get stuck coming back to this 
+                    // case over and over.  This might happen if we never move the best point 
+                    // seen so far.
+
+                    // So we let opt_k be 1
+                }
+                else
+                {
+                    std::sort(ks.begin(), ks.end());
+
+                    // figure out where f0 goes positive.
+                    for (unsigned long i = 0; i < ks.size(); ++i)
+                    {
+                        f0 += ks[i].B;
+                        if (f0 + A0*ks[i].k >= 0)
+                        {
+                            opt_k = ks[i].k;
+                            break;
+                        }
+                    }
+
+                }
+
+                // Don't let the step size get too big.  Otherwise we might pick huge steps
+                // over and over that don't improve the cutting plane approximation.  
+                if (opt_k > 1.0)
+                {
+                    opt_k = 1.0;
+                }
+
+                // take the step suggested by the line search
+                best_so_far = (1-opt_k)*best_so_far + opt_k*w;
+
+                // update best_so_far dot products
+                for (unsigned long i = 0; i < dot_prods_best.size(); ++i)
+                    dot_prods_best[i] = (1-opt_k)*dot_prods_best[i] + opt_k*dot_prods[i];
+
+
+                const scalar_type mu = 0.1;
+                // Make sure we always take a little bit of a step towards w regardless of what the
+                // line search says to do.  We do this since it is possible that some steps won't 
+                // advance the best_so_far point. So this ensures we always make some progress each 
+                // iteration.
+                w = (1-mu)*best_so_far + mu*w;
+
+                // update dot products
+                for (unsigned long i = 0; i < dot_prods.size(); ++i)
+                    dot_prods[i] = (1-mu)*dot_prods_best[i] + mu*dot_prods[i];
+            }
+        }
+
+        struct helper
+        {
+            helper(scalar_type k_, scalar_type B_) : k(k_), B(B_) {}
+            scalar_type k;
+            scalar_type B;
+
+            bool operator< (const helper& item) const { return k < item.k; }
+        };
+
+        mutable std::vector<helper> ks;
+
+        mutable bool is_first_call;
+        mutable std::vector<scalar_type> dot_prods;
+
+        mutable matrix_type best_so_far;  // best w seen so far
+        mutable std::vector<scalar_type> dot_prods_best; // dot products between best_so_far and samples
+
+
+        const in_sample_vector_type& samples;
+        const in_scalar_vector_type& labels;
+        const scalar_type C;
+        const scalar_type Cpos;
+        const scalar_type Cneg;
+
+        const bool be_verbose;
+        const scalar_type eps;
+        const unsigned long max_iterations;
+        const unsigned long dims;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type, 
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type,
+        typename scalar_type
+        >
+    oca_problem_c_svm<matrix_type, in_sample_vector_type, in_scalar_vector_type> make_oca_problem_c_svm (
+        const scalar_type C_pos,
+        const scalar_type C_neg,
+        const in_sample_vector_type& samples,
+        const in_scalar_vector_type& labels,
+        const bool be_verbose,
+        const scalar_type eps,
+        const unsigned long max_iterations,
+        const unsigned long dims
+    )
+    {
+        return oca_problem_c_svm<matrix_type, in_sample_vector_type, in_scalar_vector_type>(
+            C_pos, C_neg, samples, labels, be_verbose, eps, max_iterations, dims);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_c_linear_trainer
+    {
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        // You are getting a compiler error on this line because you supplied a non-linear kernel
+        // to the svm_c_linear_trainer object.  You have to use one of the linear kernels with this
+        // trainer.
+        COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value ||
+                             is_same_type<K, sparse_linear_kernel<sample_type> >::value ));
+
+        svm_c_linear_trainer (
+        )
+        {
+            Cpos = 1;
+            Cneg = 1;
+            verbose = false;
+            eps = 0.001;
+            max_iterations = 10000;
+            learn_nonnegative_weights = false;
+            last_weight_1 = false;
+        }
+
+        explicit svm_c_linear_trainer (
+            const scalar_type& C 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t svm_c_linear_trainer::svm_c_linear_trainer()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            Cpos = C;
+            Cneg = C;
+            verbose = false;
+            eps = 0.001;
+            max_iterations = 10000;
+            learn_nonnegative_weights = false;
+            last_weight_1 = false;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void svm_c_linear_trainer::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const { return eps; }
+
+        unsigned long get_max_iterations (
+        ) const { return max_iterations; }
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ) 
+        {
+            max_iterations = max_iter;
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void set_oca (
+            const oca& item
+        )
+        {
+            solver = item;
+        }
+
+        const oca get_oca (
+        ) const
+        {
+            return solver;
+        }
+
+        const kernel_type get_kernel (
+        ) const
+        {
+            return kernel_type();
+        }
+
+        bool learns_nonnegative_weights (
+        ) const { return learn_nonnegative_weights; }
+       
+        void set_learns_nonnegative_weights (
+            bool value
+        )
+        {
+            learn_nonnegative_weights = value;
+            if (learn_nonnegative_weights)
+                prior.set_size(0); 
+        }
+
+        bool forces_last_weight_to_1 (
+        ) const
+        {
+            return last_weight_1;
+        }
+
+        void force_last_weight_to_1 (
+            bool should_last_weight_be_1
+        )
+        {
+            last_weight_1 = should_last_weight_be_1;
+            if (last_weight_1)
+                prior.set_size(0);
+        }
+
+        void set_prior (
+            const trained_function_type& prior_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(prior_.basis_vectors.size() == 1 &&
+                        prior_.alpha(0) == 1,
+                "\t void svm_c_linear_trainer::set_prior()"
+                << "\n\t The supplied prior could not have been created by this object's train() method."
+                << "\n\t prior_.basis_vectors.size(): " << prior_.basis_vectors.size() 
+                << "\n\t prior_.alpha(0):             " << prior_.alpha(0) 
+                << "\n\t this: " << this
+                );
+
+            prior = sparse_to_dense(prior_.basis_vectors(0));
+            prior_b = prior_.b;
+            learn_nonnegative_weights = false;
+            last_weight_1 = false;
+        }
+
+        bool has_prior (
+        ) const
+        {
+            return prior.size() != 0;
+        }
+
+        void set_c (
+            scalar_type C 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t void svm_c_linear_trainer::set_c()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            Cpos = C;
+            Cneg = C;
+        }
+
+        const scalar_type get_c_class1 (
+        ) const
+        {
+            return Cpos;
+        }
+
+        const scalar_type get_c_class2 (
+        ) const
+        {
+            return Cneg;
+        }
+
+        void set_c_class1 (
+            scalar_type C
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t void svm_c_linear_trainer::set_c_class1()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            Cpos = C;
+        }
+
+        void set_c_class2 (
+            scalar_type C
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t void svm_c_linear_trainer::set_c_class2()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            Cneg = C;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            scalar_type obj;
+            return do_train(mat(x),mat(y),obj);
+        }
+
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            scalar_type& svm_objective
+        ) const
+        {
+            return do_train(mat(x),mat(y),svm_objective);
+        }
+
+    private:
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            scalar_type& svm_objective
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(x,y) == true,
+                "\t decision_function svm_c_linear_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t y.nr(): " << y.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                << "\n\t y.nc(): " << y.nc() 
+                << "\n\t is_learning_problem(x,y): " << is_learning_problem(x,y)
+                );
+#ifdef ENABLE_ASSERTS
+            for (long i = 0; i < x.size(); ++i)
+            {
+                DLIB_ASSERT(y(i) == +1 || y(i) == -1,
+                    "\t decision_function svm_c_linear_trainer::train(x,y)"
+                    << "\n\t invalid inputs were given to this function"
+                    << "\n\t y("<<i<<"): " << y(i)
+                );
+            }
+#endif
+
+
+            typedef matrix<scalar_type,0,1> w_type;
+            w_type w;
+
+            const unsigned long num_dims = max_index_plus_one(x);
+
+            unsigned long num_nonnegative = 0;
+            if (learn_nonnegative_weights)
+            {
+                num_nonnegative = num_dims;
+            }
+
+            unsigned long force_weight_1_idx = std::numeric_limits<unsigned long>::max(); 
+            if (last_weight_1)
+            {
+                force_weight_1_idx = num_dims-1; 
+            }
+
+
+            if (has_prior())
+            {
+                if (is_matrix<sample_type>::value)
+                {
+                    // make sure requires clause is not broken
+                    DLIB_CASSERT(num_dims == (unsigned long)prior.size(),
+                        "\t decision_function svm_c_linear_trainer::train(x,y)"
+                        << "\n\t The dimension of the training vectors must match the dimension of\n"
+                        << "\n\t those used to create the prior."
+                        << "\n\t num_dims:     " << num_dims 
+                        << "\n\t prior.size(): " << prior.size() 
+                    );
+                }
+                const unsigned long dims = std::max(num_dims, (unsigned long)prior.size());
+                // In the case of sparse sample vectors, it is possible that the input
+                // vector dimensionality is larger than the prior vector dimensionality.
+                // We need to check for this case and pad prior with zeros if it is the
+                // case.
+                matrix<scalar_type,0,1> prior_temp = join_cols(join_cols(prior, 
+                                                                         zeros_matrix<scalar_type>(dims-prior.size(),1)),
+                                                                         mat(prior_b));
+
+                svm_objective = solver(
+                    make_oca_problem_c_svm<w_type>(Cpos, Cneg, x, y, verbose, eps, max_iterations, dims), 
+                    w,
+                    prior_temp);
+            }
+            else
+            {
+                svm_objective = solver(
+                    make_oca_problem_c_svm<w_type>(Cpos, Cneg, x, y, verbose, eps, max_iterations, num_dims), 
+                    w,
+                    num_nonnegative,
+                    force_weight_1_idx);
+            }
+
+            // put the solution into a decision function and then return it
+            decision_function<kernel_type> df;
+            df.b = static_cast<scalar_type>(w(w.size()-1));
+            df.basis_vectors.set_size(1);
+            // Copy the plane normal into the output basis vector.  The output vector might be a
+            // sparse vector container so we need to use this special kind of copy to handle that case.
+            // As an aside, the reason for using max_index_plus_one() and not just w.size()-1 is because
+            // doing it this way avoids an inane warning from gcc that can occur in some cases.
+            const long out_size = max_index_plus_one(x);
+            assign(df.basis_vectors(0), matrix_cast<scalar_type>(colm(w, 0, out_size)));
+            df.alpha.set_size(1);
+            df.alpha(0) = 1;
+
+            return df;
+        }
+        
+        scalar_type Cpos;
+        scalar_type Cneg;
+        oca solver;
+        scalar_type eps;
+        bool verbose;
+        unsigned long max_iterations;
+        bool learn_nonnegative_weights;
+        bool last_weight_1;
+        matrix<scalar_type,0,1> prior;
+        scalar_type prior_b = 0;
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+// ----------------------------------------------------------------------------------------
+
+
+#endif // DLIB_SVM_C_LiNEAR_TRAINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/svm_c_linear_trainer_abstract.h b/ml/dlib/dlib/svm/svm_c_linear_trainer_abstract.h
new file mode 100644
index 000000000..1b7a128f0
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_c_linear_trainer_abstract.h
@@ -0,0 +1,359 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVM_C_LiNEAR_TRAINER_ABSTRACT_Hh_
+#ifdef DLIB_SVM_C_LiNEAR_TRAINER_ABSTRACT_Hh_
+
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "sparse_kernel_abstract.h"
+
+namespace dlib
+{
+    template <
+        typename K 
+        >
+    class svm_c_linear_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                Is either linear_kernel or sparse_linear_kernel.  
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a tool for training the C formulation of 
+                a support vector machine.  It is optimized for the case where
+                linear kernels are used.  
+
+
+                In particular, it is implemented using the OCAS algorithm
+                described in the following paper:
+                    Optimized Cutting Plane Algorithm for Large-Scale Risk Minimization
+                        Vojtech Franc, Soren Sonnenburg; Journal of Machine Learning 
+                        Research, 10(Oct):2157--2192, 2009. 
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svm_c_linear_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_oca() == oca() (i.e. an instance of oca with default parameters) 
+                - #get_c_class1() == 1
+                - #get_c_class2() == 1
+                - #get_epsilon() == 0.001
+                - this object will not be verbose unless be_verbose() is called
+                - #get_max_iterations() == 10000
+                - #learns_nonnegative_weights() == false
+                - #force_last_weight_to_1() == false
+                - #has_prior() == false
+        !*/
+
+        explicit svm_c_linear_trainer (
+            const scalar_type& C 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_oca() == oca() (i.e. an instance of oca with default parameters) 
+                - #get_c_class1() == C
+                - #get_c_class2() == C
+                - #get_epsilon() == 0.001
+                - this object will not be verbose unless be_verbose() is called
+                - #get_max_iterations() == 10000
+                - #learns_nonnegative_weights() == false
+                - #force_last_weight_to_1() == false
+                - #has_prior() == false
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer to
+                  train.  You can think of this epsilon value as saying "solve the
+                  optimization problem until the probability of misclassification is within
+                  epsilon of its optimal value".  
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        unsigned long get_max_iterations (
+        ); 
+        /*!
+            ensures
+                - returns the maximum number of iterations the SVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the optimizer used to solve the SVM problem.  
+        !*/
+
+        const kernel_type get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object.  Since
+                  the linear kernels don't have any parameters this function just
+                  returns kernel_type()
+        !*/
+
+        bool learns_nonnegative_weights (
+        ) const;
+        /*!
+            ensures
+                - The output of training is a weight vector and a bias value.  These
+                  two things define the resulting decision function.  That is, the
+                  decision function simply takes the dot product between the learned
+                  weight vector and a test sample, then subtracts the bias value.  
+                  Therefore, if learns_nonnegative_weights() == true then the resulting
+                  learned weight vector will always have non-negative entries.  The
+                  bias value may still be negative though.
+        !*/
+       
+        void set_learns_nonnegative_weights (
+            bool value
+        );
+        /*!
+            ensures
+                - #learns_nonnegative_weights() == value
+                - if (value == true) then
+                    - #has_prior() == false
+        !*/
+
+        void set_prior (
+            const trained_function_type& prior
+        );
+        /*!
+            requires
+                - prior == a function produced by a call to this class's train() function.  
+                  Therefore, it must be the case that:
+                    - prior.basis_vectors.size() == 1
+                    - prior.alpha(0) == 1
+            ensures
+                - Subsequent calls to train() will try to learn a function similar to the
+                  given prior.
+                - #has_prior() == true
+                - #learns_nonnegative_weights() == false
+                - #forces_last_weight_to_1() == false
+        !*/
+
+        bool has_prior (
+        ) const
+        /*!
+            ensures
+                - returns true if a prior has been set and false otherwise.  Having a prior
+                  set means that you have called set_prior() and supplied a previously
+                  trained function as a reference.  In this case, any call to train() will
+                  try to learn a function that matches the behavior of the prior as close
+                  as possible but also fits the supplied training data.  In more technical
+                  detail, having a prior means we replace the ||w||^2 regularizer with one
+                  of the form ||w-prior||^2 where w is the set of parameters for a learned
+                  function.
+        !*/
+
+        bool forces_last_weight_to_1 (
+        ) const;
+        /*!
+            ensures
+                - returns true if this trainer has the constraint that the last weight in
+                  the learned parameter vector must be 1.  This is the weight corresponding
+                  to the feature in the training vectors with the highest dimension.  
+                - Forcing the last weight to 1 also disables the bias and therefore the b
+                  field of the learned decision_function will be 0 when forces_last_weight_to_1() == true.
+        !*/
+
+        void force_last_weight_to_1 (
+            bool should_last_weight_be_1
+        );
+        /*!
+            ensures
+                - #forces_last_weight_to_1() == should_last_weight_be_1
+                - if (should_last_weight_be_1 == true) then
+                    - #has_prior() == false
+        !*/
+
+        void set_c (
+            scalar_type C 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c_class1() == C 
+                - #get_c_class2() == C 
+        !*/
+
+        const scalar_type get_c_class1 (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter for the +1 class.  
+                  It is the parameter that determines the trade off between
+                  trying to fit the +1 training data exactly or allowing more errors 
+                  but hopefully improving the generalization of the resulting 
+                  classifier.  Larger values encourage exact fitting while 
+                  smaller values of C may encourage better generalization. 
+        !*/
+
+        const scalar_type get_c_class2 (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter for the -1 class.  
+                  It is the parameter that determines the trade off between
+                  trying to fit the -1 training data exactly or allowing more errors 
+                  but hopefully improving the generalization of the resulting 
+                  classifier.  Larger values encourage exact fitting while 
+                  smaller values of C may encourage better generalization. 
+        !*/
+
+        void set_c_class1 (
+            scalar_type C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c_class1() == C
+        !*/
+
+        void set_c_class2 (
+            scalar_type C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c_class2() == C
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(x,y) == true
+                  (Note that it is ok for x.size() == 1)
+                - All elements of y must be equal to +1 or -1
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+                - if (has_prior()) then
+                    - The vectors in x must have the same dimensionality as the vectors
+                      used to train the prior given to set_prior().  
+            ensures
+                - trains a C support vector classifier given the training samples in x and 
+                  labels in y.  
+                - returns a decision function F with the following properties:
+                    - F.alpha.size() == 1
+                    - F.basis_vectors.size() == 1
+                    - F.alpha(0) == 1
+                    - if (new_x is a sample predicted have +1 label) then
+                        - F(new_x) >= 0
+                    - else
+                        - F(new_x) < 0
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y,
+            scalar_type& svm_objective
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(x,y) == true
+                  (Note that it is ok for x.size() == 1)
+                - All elements of y must be equal to +1 or -1
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+                - if (has_prior()) then
+                    - The vectors in x must have the same dimensionality as the vectors
+                      used to train the prior given to set_prior().  
+            ensures
+                - trains a C support vector classifier given the training samples in x and 
+                  labels in y.  
+                - #svm_objective == the final value of the SVM objective function
+                - returns a decision function F with the following properties:
+                    - F.alpha.size() == 1
+                    - F.basis_vectors.size() == 1
+                    - F.alpha(0) == 1
+                    - if (new_x is a sample predicted have +1 label) then
+                        - F(new_x) >= 0
+                    - else
+                        - F(new_x) < 0
+        !*/
+
+    }; 
+
+}
+
+#endif // DLIB_SVM_C_LiNEAR_TRAINER_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/svm_c_trainer.h b/ml/dlib/dlib/svm/svm_c_trainer.h
new file mode 100644
index 000000000..14dcf3482
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_c_trainer.h
@@ -0,0 +1,359 @@
+// Copyright (C) 2007  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_C_TRAINER_Hh_ 
+#define DLIB_SVm_C_TRAINER_Hh_
+
+//#include "local/make_label_kernel_matrix.h"
+
+#include "svm_c_trainer_abstract.h"
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix.h"
+#include "../algs.h"
+
+#include "function.h"
+#include "kernel.h"
+#include "../optimization/optimization_solve_qp3_using_smo.h"
+
+namespace dlib 
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_c_trainer
+    {
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svm_c_trainer (
+        ) :
+            Cpos(1),
+            Cneg(1),
+            cache_size(200),
+            eps(0.001)
+        {
+        }
+
+        svm_c_trainer (
+            const kernel_type& kernel_, 
+            const scalar_type& C_
+        ) :
+            kernel_function(kernel_),
+            Cpos(C_),
+            Cneg(C_),
+            cache_size(200),
+            eps(0.001)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < C_,
+                "\tsvm_c_trainer::svm_c_trainer(kernel,C)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t C_: " << C_
+                );
+        }
+
+        void set_cache_size (
+            long cache_size_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(cache_size_ > 0,
+                "\tvoid svm_c_trainer::set_cache_size(cache_size_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t cache_size: " << cache_size_ 
+                );
+            cache_size = cache_size_;
+        }
+
+        long get_cache_size (
+        ) const
+        {
+            return cache_size;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid svm_c_trainer::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps_: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const
+        { 
+            return eps;
+        }
+
+        void set_kernel (
+            const kernel_type& k
+        )
+        {
+            kernel_function = k;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel_function;
+        }
+
+        void set_c (
+            scalar_type C 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t void svm_c_trainer::set_c()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            Cpos = C;
+            Cneg = C;
+        }
+
+        const scalar_type get_c_class1 (
+        ) const
+        {
+            return Cpos;
+        }
+
+        const scalar_type get_c_class2 (
+        ) const
+        {
+            return Cneg;
+        }
+
+        void set_c_class1 (
+            scalar_type C
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t void svm_c_trainer::set_c_class1()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            Cpos = C;
+        }
+
+        void set_c_class2 (
+            scalar_type C
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C > 0,
+                "\t void svm_c_trainer::set_c_class2()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C:    " << C 
+                << "\n\t this: " << this
+                );
+
+            Cneg = C;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            return do_train(mat(x), mat(y));
+        }
+
+        void swap (
+            svm_c_trainer& item
+        )
+        {
+            exchange(kernel_function, item.kernel_function);
+            exchange(Cpos,            item.Cpos);
+            exchange(Cneg,            item.Cneg);
+            exchange(cache_size,      item.cache_size);
+            exchange(eps,             item.eps);
+        }
+
+    private:
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            typedef typename K::scalar_type scalar_type;
+            typedef typename decision_function<K>::sample_vector_type sample_vector_type;
+            typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
+                "\tdecision_function svm_c_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t y.nr(): " << y.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                << "\n\t y.nc(): " << y.nc() 
+                << "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y)
+                );
+
+
+            scalar_vector_type alpha;
+
+            solve_qp3_using_smo<scalar_vector_type> solver;
+
+            solver(symmetric_matrix_cache<float>((diagm(y)*kernel_matrix(kernel_function,x)*diagm(y)), cache_size), 
+            //solver(symmetric_matrix_cache<float>(make_label_kernel_matrix(kernel_matrix(kernel_function,x),y), cache_size), 
+                   uniform_matrix<scalar_type>(y.size(),1,-1),
+                   y, 
+                   0,
+                   Cpos,
+                   Cneg,
+                   alpha,
+                   eps);
+
+            scalar_type b;
+            calculate_b(y,alpha,solver.get_gradient(),Cpos,Cneg,b);
+            alpha = pointwise_multiply(alpha,y);
+
+            // count the number of support vectors
+            const long sv_count = (long)sum(alpha != 0);
+
+            scalar_vector_type sv_alpha;
+            sample_vector_type support_vectors;
+
+            // size these column vectors so that they have an entry for each support vector
+            sv_alpha.set_size(sv_count);
+            support_vectors.set_size(sv_count);
+
+            // load the support vectors and their alpha values into these new column matrices
+            long idx = 0;
+            for (long i = 0; i < alpha.nr(); ++i)
+            {
+                if (alpha(i) != 0)
+                {
+                    sv_alpha(idx) = alpha(i);
+                    support_vectors(idx) = x(i);
+                    ++idx;
+                }
+            }
+
+            // now return the decision function
+            return decision_function<K> (sv_alpha, b, kernel_function, support_vectors);
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename scalar_vector_type,
+            typename scalar_vector_type2
+            >
+        void calculate_b(
+            const scalar_vector_type2& y,
+            const scalar_vector_type& alpha,
+            const scalar_vector_type& df,
+            const scalar_type& Cpos,
+            const scalar_type& Cneg,
+            scalar_type& b
+        ) const
+        {
+            using namespace std;
+            long num_free = 0;
+            scalar_type sum_free = 0;
+
+            scalar_type upper_bound = -numeric_limits<scalar_type>::infinity();
+            scalar_type lower_bound = numeric_limits<scalar_type>::infinity();
+
+            for(long i = 0; i < alpha.nr(); ++i)
+            {
+                if(y(i) == 1)
+                {
+                    if(alpha(i) == Cpos)
+                    {
+                        if (df(i) > upper_bound)
+                            upper_bound = df(i);
+                    }
+                    else if(alpha(i) == 0)
+                    {
+                        if (df(i) < lower_bound)
+                            lower_bound = df(i);
+                    }
+                    else
+                    {
+                        ++num_free;
+                        sum_free += df(i);
+                    }
+                }
+                else
+                {
+                    if(alpha(i) == Cneg)
+                    {
+                        if (-df(i) < lower_bound)
+                            lower_bound = -df(i);
+                    }
+                    else if(alpha(i) == 0)
+                    {
+                        if (-df(i) > upper_bound)
+                            upper_bound = -df(i);
+                    }
+                    else
+                    {
+                        ++num_free;
+                        sum_free -= df(i);
+                    }
+                }
+            }
+
+            if(num_free > 0)
+                b = sum_free/num_free;
+            else
+                b = (upper_bound+lower_bound)/2;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+
+        kernel_type kernel_function;
+        scalar_type Cpos;
+        scalar_type Cneg;
+        long cache_size;
+        scalar_type eps;
+    }; // end of class svm_c_trainer
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename K>
+    void swap (
+        svm_c_trainer<K>& a,
+        svm_c_trainer<K>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_C_TRAINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/svm_c_trainer_abstract.h b/ml/dlib/dlib/svm/svm_c_trainer_abstract.h
new file mode 100644
index 000000000..696cccdb7
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_c_trainer_abstract.h
@@ -0,0 +1,237 @@
+// Copyright (C) 2007  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_C_TRAINER_ABSTRACT_
+#ifdef DLIB_SVm_C_TRAINER_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "../optimization/optimization_solve_qp3_using_smo_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_c_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements a trainer for a C support vector machine for 
+                solving binary classification problems.  It is implemented using the SMO
+                algorithm.
+
+                The implementation of the C-SVM training algorithm used by this object is based
+                on the following paper:
+                    - Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector 
+                      machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
+
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svm_c_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_c_class1() == 1
+                - #get_c_class2() == 1
+                - #get_cache_size() == 200
+                - #get_epsilon() == 0.001
+        !*/
+
+        svm_c_trainer (
+            const kernel_type& kernel, 
+            const scalar_type& C
+        );
+        /*!
+            requires
+                - 0 < C
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_kernel() == kernel
+                - #get_c_class1() == C
+                - #get_c_class2() == C
+                - #get_cache_size() == 200
+                - #get_epsilon() == 0.001
+        !*/
+
+        void set_cache_size (
+            long cache_size
+        );
+        /*!
+            requires
+                - cache_size > 0
+            ensures
+                - #get_cache_size() == cache_size 
+        !*/
+
+        const long get_cache_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of megabytes of cache this object will use
+                  when it performs training via the this->train() function.
+                  (bigger values of this may make training go faster but won't affect 
+                  the result.  However, too big a value will cause you to run out of 
+                  memory, obviously.)
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Generally a good value for this is 0.001.  Smaller values may result
+                  in a more accurate solution but take longer to execute.
+        !*/
+
+        void set_kernel (
+            const kernel_type& k
+        );
+        /*!
+            ensures
+                - #get_kernel() == k 
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object
+        !*/
+
+        void set_c (
+            scalar_type C 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c_class1() == C 
+                - #get_c_class2() == C 
+        !*/
+
+        const scalar_type get_c_class1 (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter for the +1 class.  
+                  It is the parameter that determines the trade off between
+                  trying to fit the +1 training data exactly or allowing more errors 
+                  but hopefully improving the generalization ability of the 
+                  resulting classifier.  Larger values encourage exact fitting 
+                  while smaller values of C may encourage better generalization. 
+        !*/
+
+        const scalar_type get_c_class2 (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter for the -1 class.  
+                  It is the parameter that determines the trade off between
+                  trying to fit the -1 training data exactly or allowing more errors 
+                  but hopefully improving the generalization ability of the 
+                  resulting classifier.  Larger values encourage exact fitting 
+                  while smaller values of C may encourage better generalization. 
+        !*/
+
+        void set_c_class1 (
+            scalar_type C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c_class1() == C
+        !*/
+
+        void set_c_class2 (
+            scalar_type C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c_class2() == C
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - is_binary_classification_problem(x,y) == true
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+            ensures
+                - trains a C support vector classifier given the training samples in x and 
+                  labels in y.  Training is done when the error is less than get_epsilon().
+                - returns a decision function F with the following properties:
+                    - if (new_x is a sample predicted have +1 label) then
+                        - F(new_x) >= 0
+                    - else
+                        - F(new_x) < 0
+        !*/
+
+        void swap (
+            svm_c_trainer& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+    }; 
+
+    template <typename K>
+    void swap (
+        svm_c_trainer<K>& a,
+        svm_c_trainer<K>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_C_TRAINER_ABSTRACT_
+
+
+
diff --git a/ml/dlib/dlib/svm/svm_multiclass_linear_trainer.h b/ml/dlib/dlib/svm/svm_multiclass_linear_trainer.h
new file mode 100644
index 000000000..4727f7226
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_multiclass_linear_trainer.h
@@ -0,0 +1,432 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_MULTICLASS_LINEAR_TRAINER_Hh_ 
+#define DLIB_SVm_MULTICLASS_LINEAR_TRAINER_Hh_
+
+#include "svm_multiclass_linear_trainer_abstract.h"
+#include "structural_svm_problem_threaded.h"
+#include <vector>
+#include "../optimization/optimization_oca.h"
+#include "../matrix.h"
+#include "sparse_vector.h"
+#include "function.h"
+#include <algorithm>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type,
+        typename sample_type,
+        typename label_type
+        >
+    class multiclass_svm_problem : public structural_svm_problem_threaded<matrix_type,
+                                                                 std::vector<std::pair<unsigned long,typename matrix_type::type> > > 
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object defines the optimization problem for the multiclass SVM trainer
+                object at the bottom of this file.  
+
+                The joint feature vectors used by this object, the PSI(x,y) vectors, are
+                defined as follows:
+                    PSI(x,0) = [x,0,0,0,0, ...,0]
+                    PSI(x,1) = [0,x,0,0,0, ...,0]
+                    PSI(x,2) = [0,0,x,0,0, ...,0]
+                That is, if there are N labels then the joint feature vector has a
+                dimension that is N times the dimension of a single x sample.  Also,
+                note that we append a -1 value onto each x to account for the bias term.
+        !*/
+
+    public:
+        typedef typename matrix_type::type scalar_type;
+        typedef std::vector<std::pair<unsigned long,scalar_type> > feature_vector_type;
+
+        multiclass_svm_problem (
+            const std::vector<sample_type>& samples_,
+            const std::vector<label_type>& labels_,
+            const std::vector<label_type>& distinct_labels_,
+            const unsigned long dims_,
+            const unsigned long num_threads
+        ) :
+            structural_svm_problem_threaded<matrix_type, std::vector<std::pair<unsigned long,typename matrix_type::type> > >(num_threads),
+            samples(samples_),
+            labels(labels_),
+            distinct_labels(distinct_labels_),
+            dims(dims_+1) // +1 for the bias
+        {}
+
+        virtual long get_num_dimensions (
+        ) const
+        {
+            return dims*distinct_labels.size();
+        }
+
+        virtual long get_num_samples (
+        ) const 
+        {
+            return static_cast<long>(samples.size());
+        }
+
+        virtual void get_truth_joint_feature_vector (
+            long idx,
+            feature_vector_type& psi
+        ) const 
+        {
+            assign(psi, samples[idx]);
+            // Add a constant -1 to account for the bias term.
+            psi.push_back(std::make_pair(dims-1,static_cast<scalar_type>(-1)));
+
+            // Find which distinct label goes with this psi.
+            long label_idx = 0;
+            for (unsigned long i = 0; i < distinct_labels.size(); ++i)
+            {
+                if (distinct_labels[i] == labels[idx])
+                {
+                    label_idx = i;
+                    break;
+                }
+            }
+
+            offset_feature_vector(psi, dims*label_idx);
+        }
+
+        virtual void separation_oracle (
+            const long idx,
+            const matrix_type& current_solution,
+            scalar_type& loss,
+            feature_vector_type& psi
+        ) const 
+        {
+            scalar_type best_val = -std::numeric_limits<scalar_type>::infinity();
+            unsigned long best_idx = 0;
+
+            // Figure out which label is the best.  That is, what label maximizes
+            // LOSS(idx,y) + F(x,y).  Note that y in this case is given by distinct_labels[i].
+            for (unsigned long i = 0; i < distinct_labels.size(); ++i)
+            {
+                // Compute the F(x,y) part:
+                // perform: temp == dot(relevant part of current solution, samples[idx]) - current_bias
+                scalar_type temp = dot(mat(&current_solution(i*dims),dims-1), samples[idx]) - current_solution((i+1)*dims-1);
+
+                // Add the LOSS(idx,y) part:
+                if (labels[idx] != distinct_labels[i])
+                    temp += 1;
+
+                // Now temp == LOSS(idx,y) + F(x,y).  Check if it is the biggest we have seen.
+                if (temp > best_val)
+                {
+                    best_val = temp;
+                    best_idx = i;
+                }
+            }
+
+            assign(psi, samples[idx]);
+            // add a constant -1 to account for the bias term
+            psi.push_back(std::make_pair(dims-1,static_cast<scalar_type>(-1)));
+
+            offset_feature_vector(psi, dims*best_idx);
+
+            if (distinct_labels[best_idx] == labels[idx])
+                loss = 0;
+            else
+                loss = 1;
+        }
+
+    private:
+
+        void offset_feature_vector (
+            feature_vector_type& sample,
+            const unsigned long val
+        ) const
+        {
+            if (val != 0)
+            {
+                for (typename feature_vector_type::iterator i = sample.begin(); i != sample.end(); ++i)
+                {
+                    i->first += val;
+                }
+            }
+        }
+
+
+        const std::vector<sample_type>& samples;
+        const std::vector<label_type>& labels;
+        const std::vector<label_type>& distinct_labels;
+        const long dims;
+    };
+
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K,
+        typename label_type_ = typename K::scalar_type 
+        >
+    class svm_multiclass_linear_trainer
+    {
+    public:
+        typedef label_type_ label_type;
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+
+        typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type;
+
+
+        // You are getting a compiler error on this line because you supplied a non-linear kernel
+        // to the svm_c_linear_trainer object.  You have to use one of the linear kernels with this
+        // trainer.
+        COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value ||
+                             is_same_type<K, sparse_linear_kernel<sample_type> >::value ));
+
+        svm_multiclass_linear_trainer (
+        ) :
+            num_threads(4),
+            C(1),
+            eps(0.001),
+            max_iterations(10000),
+            verbose(false),
+            learn_nonnegative_weights(false)
+        {
+        }
+
+        void set_num_threads (
+            unsigned long num
+        )
+        {
+            num_threads = num;
+        }
+
+        unsigned long get_num_threads (
+        ) const
+        {
+            return num_threads;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void svm_multiclass_linear_trainer::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const { return eps; }
+
+        unsigned long get_max_iterations (
+        ) const { return max_iterations; }
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ) 
+        {
+            max_iterations = max_iter;
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        void set_oca (
+            const oca& item
+        )
+        {
+            solver = item;
+        }
+
+        const oca get_oca (
+        ) const
+        {
+            return solver;
+        }
+
+        const kernel_type get_kernel (
+        ) const
+        {
+            return kernel_type();
+        }
+
+        bool learns_nonnegative_weights (
+        ) const { return learn_nonnegative_weights; }
+       
+        void set_learns_nonnegative_weights (
+            bool value
+        )
+        {
+            learn_nonnegative_weights = value;
+            if (learn_nonnegative_weights)
+                prior = trained_function_type(); 
+        }
+
+        void set_c (
+            scalar_type C_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void svm_multiclass_linear_trainer::set_c()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C_:   " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+        }
+
+        const scalar_type get_c (
+        ) const
+        {
+            return C;
+        }
+
+        void set_prior (
+            const trained_function_type& prior_
+        )
+        {
+            prior = prior_;
+            learn_nonnegative_weights = false;
+        }
+
+        bool has_prior (
+        ) const
+        {
+            return prior.labels.size() != 0;
+        }
+
+        trained_function_type train (
+            const std::vector<sample_type>& all_samples,
+            const std::vector<label_type>& all_labels
+        ) const
+        {
+            scalar_type svm_objective = 0;
+            return train(all_samples, all_labels, svm_objective);
+        }
+
+        trained_function_type train (
+            const std::vector<sample_type>& all_samples,
+            const std::vector<label_type>& all_labels,
+            scalar_type& svm_objective
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(all_samples,all_labels),
+                "\t trained_function_type svm_multiclass_linear_trainer::train(all_samples,all_labels)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t all_samples.size():     " << all_samples.size() 
+                << "\n\t all_labels.size():      " << all_labels.size() 
+                );
+
+            trained_function_type df;
+            df.labels = select_all_distinct_labels(all_labels);
+            if (has_prior())
+            {
+                df.labels.insert(df.labels.end(), prior.labels.begin(), prior.labels.end());
+                df.labels = select_all_distinct_labels(df.labels);
+            }
+            const long input_sample_dimensionality = max_index_plus_one(all_samples);
+            // If the samples are sparse then the right thing to do is to take the max
+            // dimensionality between the prior and the new samples.  But if the samples
+            // are dense vectors then they definitely all have to have exactly the same
+            // dimensionality.
+            const long dims = std::max(df.weights.nc(),input_sample_dimensionality);
+            if (is_matrix<sample_type>::value && has_prior())
+            {
+                DLIB_ASSERT(input_sample_dimensionality == prior.weights.nc(), 
+                    "\t trained_function_type svm_multiclass_linear_trainer::train(all_samples,all_labels)"
+                    << "\n\t The training samples given to this function are not the same kind of training "
+                    << "\n\t samples used to create the prior."
+                    << "\n\t input_sample_dimensionality: " << input_sample_dimensionality 
+                    << "\n\t prior.weights.nc():          " << prior.weights.nc() 
+                );
+            }
+
+            typedef matrix<scalar_type,0,1> w_type;
+            w_type weights;
+            multiclass_svm_problem<w_type, sample_type, label_type> problem(all_samples, all_labels, df.labels, dims, num_threads);
+            if (verbose)
+                problem.be_verbose();
+
+            problem.set_max_cache_size(0);
+            problem.set_c(C);
+            problem.set_epsilon(eps);
+            problem.set_max_iterations(max_iterations);
+
+            unsigned long num_nonnegative = 0;
+            if (learn_nonnegative_weights)
+            {
+                num_nonnegative = problem.get_num_dimensions();
+            }
+
+            if (!has_prior())
+            {
+                svm_objective = solver(problem, weights, num_nonnegative);
+            }
+            else
+            {
+                matrix<scalar_type> temp(df.labels.size(),dims);
+                w_type b(df.labels.size());
+                temp = 0;
+                b = 0;
+
+                const long pad_size = dims-prior.weights.nc();
+                // Copy the prior into the temp and b matrices.  We have to do this row
+                // by row copy because the new training data might have new labels we
+                // haven't seen before and therefore the sizes of these matrices could be
+                // different.
+                for (unsigned long i = 0; i < prior.labels.size(); ++i)
+                {
+                    const long r = std::find(df.labels.begin(), df.labels.end(), prior.labels[i])-df.labels.begin();
+                    set_rowm(temp,r) = join_rows(rowm(prior.weights,i), zeros_matrix<scalar_type>(1,pad_size));
+                    b(r) = prior.b(i);
+                }
+
+                const w_type prior_vect = reshape_to_column_vector(join_rows(temp,b));
+                svm_objective = solver(problem, weights, prior_vect);
+            }
+
+
+            df.weights = colm(reshape(weights, df.labels.size(), dims+1), range(0,dims-1));
+            df.b       = colm(reshape(weights, df.labels.size(), dims+1), dims);
+            return df;
+        }
+
+    private:
+
+        unsigned long num_threads;
+        scalar_type C;
+        scalar_type eps;
+        unsigned long max_iterations;
+        bool verbose;
+        oca solver;
+        bool learn_nonnegative_weights;
+
+        trained_function_type prior;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+
+#endif // DLIB_SVm_MULTICLASS_LINEAR_TRAINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/svm_multiclass_linear_trainer_abstract.h b/ml/dlib/dlib/svm/svm_multiclass_linear_trainer_abstract.h
new file mode 100644
index 000000000..6561ce7b2
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_multiclass_linear_trainer_abstract.h
@@ -0,0 +1,275 @@
+// Copyright (C) 2011  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_MULTICLASS_LINEAR_TRAINER_ABSTRACT_Hh_ 
+#ifdef DLIB_SVm_MULTICLASS_LINEAR_TRAINER_ABSTRACT_Hh_
+
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "sparse_kernel_abstract.h"
+#include "../optimization/optimization_oca_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K,
+        typename label_type_ = typename K::scalar_type 
+        >
+    class svm_multiclass_linear_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                Is either linear_kernel or sparse_linear_kernel.  
+
+            REQUIREMENTS ON label_type_
+                label_type_ must be default constructable, copyable, and comparable using
+                operator < and ==.  It must also be possible to write it to an std::ostream
+                using operator<<.
+
+            INITIAL VALUE
+                - get_num_threads() == 4 
+                - learns_nonnegative_weights() == false
+                - get_epsilon() == 0.001
+                - get_max_iterations() == 10000
+                - get_c() == 1
+                - this object will not be verbose unless be_verbose() is called
+                - #get_oca() == oca() (i.e. an instance of oca with default parameters) 
+                - has_prior() == false
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a tool for training a multiclass support 
+                vector machine.  It is optimized for the case where linear kernels 
+                are used.  
+        !*/
+
+    public:
+        typedef label_type_ label_type;
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef multiclass_linear_decision_function<kernel_type, label_type> trained_function_type;
+
+        svm_multiclass_linear_trainer (
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer 
+                  to execute.
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        unsigned long get_max_iterations (
+        ); 
+        /*!
+            ensures
+                - returns the maximum number of iterations the SVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a 
+                  user can observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the optimizer used to solve the SVM problem.  
+        !*/
+
+        void set_num_threads (
+            unsigned long num
+        );
+        /*!
+            ensures
+                - #get_num_threads() == num
+        !*/
+
+        unsigned long get_num_threads (
+        ) const;
+        /*!
+            ensures
+                - returns the number of threads used during training.  You should 
+                  usually set this equal to the number of processing cores on your
+                  machine.
+        !*/
+
+        const kernel_type get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object.  Since
+                  the linear kernels don't have any parameters this function just
+                  returns kernel_type()
+        !*/
+
+        void set_c (
+            scalar_type C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() == C 
+        !*/
+
+        const scalar_type get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter that 
+                  determines the trade off between trying to fit the training data 
+                  exactly or allowing more errors but hopefully improving the 
+                  generalization of the resulting classifier.  Larger values encourage 
+                  exact fitting while smaller values of C may encourage better 
+                  generalization. 
+        !*/
+
+        bool learns_nonnegative_weights (
+        ) const;
+        /*!
+            ensures
+                - The output of training is a set of weights and bias values that together
+                  define the behavior of a multiclass_linear_decision_function object.  If
+                  learns_nonnegative_weights() == true then the resulting weights and bias
+                  values will always have non-negative values.  That is, if this function
+                  returns true then all the numbers in the multiclass_linear_decision_function 
+                  objects output by train() will be non-negative.
+        !*/
+       
+        void set_learns_nonnegative_weights (
+            bool value
+        );
+        /*!
+            ensures
+                - #learns_nonnegative_weights() == value
+                - if (value == true) then
+                    - #has_prior() == false
+        !*/
+
+        void set_prior (
+            const trained_function_type& prior
+        );
+        /*!
+            ensures
+                - Subsequent calls to train() will try to learn a function similar to the
+                  given prior.
+                - #has_prior() == true
+                - #learns_nonnegative_weights() == false
+        !*/
+
+        bool has_prior (
+        ) const
+        /*!
+            ensures
+                - returns true if a prior has been set and false otherwise.  Having a prior
+                  set means that you have called set_prior() and supplied a previously
+                  trained function as a reference.  In this case, any call to train() will
+                  try to learn a function that matches the behavior of the prior as close
+                  as possible but also fits the supplied training data.  In more technical
+                  detail, having a prior means we replace the ||w||^2 regularizer with one
+                  of the form ||w-prior||^2 where w is the set of parameters for a learned
+                  function.
+        !*/
+
+        trained_function_type train (
+            const std::vector<sample_type>& all_samples,
+            const std::vector<label_type>& all_labels
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(all_samples, all_labels)
+                - All the vectors in all_samples must have the same dimensionality.
+                - if (has_prior()) then
+                    - The vectors in all_samples must have the same dimensionality as the
+                      vectors used to train the prior given to set_prior().  
+            ensures
+                - trains a multiclass SVM to solve the given multiclass classification problem.  
+                - returns a multiclass_linear_decision_function F with the following properties:
+                    - if (new_x is a sample predicted to have a label of L) then
+                        - F(new_x) == L
+                    - F.get_labels() == select_all_distinct_labels(all_labels)
+                    - F.number_of_classes() == select_all_distinct_labels(all_labels).size()
+        !*/
+
+        trained_function_type train (
+            const std::vector<sample_type>& all_samples,
+            const std::vector<label_type>& all_labels,
+            scalar_type& svm_objective
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(all_samples, all_labels)
+                - All the vectors in all_samples must have the same dimensionality.
+                - if (has_prior()) then
+                    - The vectors in all_samples must have the same dimensionality as the
+                      vectors used to train the prior given to set_prior().  
+            ensures
+                - trains a multiclass SVM to solve the given multiclass classification problem.  
+                - returns a multiclass_linear_decision_function F with the following properties:
+                    - if (new_x is a sample predicted to have a label of L) then
+                        - F(new_x) == L
+                    - F.get_labels() == select_all_distinct_labels(all_labels)
+                    - F.number_of_classes() == select_all_distinct_labels(all_labels).size()
+                - #svm_objective == the final value of the SVM objective function
+        !*/
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+
+#endif // DLIB_SVm_MULTICLASS_LINEAR_TRAINER_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/svm_nu_trainer.h b/ml/dlib/dlib/svm/svm_nu_trainer.h
new file mode 100644
index 000000000..1e89d6efa
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_nu_trainer.h
@@ -0,0 +1,326 @@
+// Copyright (C) 2007  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_NU_TRAINER_Hh_ 
+#define DLIB_SVm_NU_TRAINER_Hh_
+
+//#include "local/make_label_kernel_matrix.h"
+
+#include "svm_nu_trainer_abstract.h"
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix.h"
+#include "../algs.h"
+#include "../serialize.h"
+
+#include "function.h"
+#include "kernel.h"
+#include "../optimization/optimization_solve_qp2_using_smo.h"
+
+namespace dlib 
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_nu_trainer
+    {
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svm_nu_trainer (
+        ) :
+            nu(0.1),
+            cache_size(200),
+            eps(0.001)
+        {
+        }
+
+        svm_nu_trainer (
+            const kernel_type& kernel_, 
+            const scalar_type& nu_
+        ) :
+            kernel_function(kernel_),
+            nu(nu_),
+            cache_size(200),
+            eps(0.001)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < nu && nu <= 1,
+                "\tsvm_nu_trainer::svm_nu_trainer(kernel,nu)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t nu: " << nu 
+                );
+        }
+
+        void set_cache_size (
+            long cache_size_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(cache_size_ > 0,
+                "\tvoid svm_nu_trainer::set_cache_size(cache_size_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t cache_size: " << cache_size_ 
+                );
+            cache_size = cache_size_;
+        }
+
+        long get_cache_size (
+        ) const
+        {
+            return cache_size;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid svm_nu_trainer::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const
+        { 
+            return eps;
+        }
+
+        void set_kernel (
+            const kernel_type& k
+        )
+        {
+            kernel_function = k;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel_function;
+        }
+
+        void set_nu (
+            scalar_type nu_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < nu_ && nu_ <= 1,
+                "\tvoid svm_nu_trainer::set_nu(nu_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t nu: " << nu_ 
+                );
+            nu = nu_;
+        }
+
+        const scalar_type get_nu (
+        ) const
+        {
+            return nu;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            return do_train(mat(x), mat(y));
+        }
+
+        void swap (
+            svm_nu_trainer& item
+        )
+        {
+            exchange(kernel_function, item.kernel_function);
+            exchange(nu,              item.nu);
+            exchange(cache_size,      item.cache_size);
+            exchange(eps,             item.eps);
+        }
+
+    private:
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            typedef typename K::scalar_type scalar_type;
+            typedef typename decision_function<K>::sample_vector_type sample_vector_type;
+            typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_binary_classification_problem(x,y) == true,
+                "\tdecision_function svm_nu_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t y.nr(): " << y.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                << "\n\t y.nc(): " << y.nc() 
+                << "\n\t is_binary_classification_problem(x,y): " << is_binary_classification_problem(x,y)
+                );
+
+
+            scalar_vector_type alpha;
+
+            solve_qp2_using_smo<scalar_vector_type> solver;
+
+            solver(symmetric_matrix_cache<float>((diagm(y)*kernel_matrix(kernel_function,x)*diagm(y)), cache_size), 
+            //solver(symmetric_matrix_cache<float>(make_label_kernel_matrix(kernel_matrix(kernel_function,x),y), cache_size), 
+                   y, 
+                   nu,
+                   alpha,
+                   eps);
+
+            scalar_type rho, b;
+            calculate_rho_and_b(y,alpha,solver.get_gradient(),rho,b);
+            alpha = pointwise_multiply(alpha,y)/rho;
+
+            // count the number of support vectors
+            const long sv_count = (long)sum(alpha != 0);
+
+            scalar_vector_type sv_alpha;
+            sample_vector_type support_vectors;
+
+            // size these column vectors so that they have an entry for each support vector
+            sv_alpha.set_size(sv_count);
+            support_vectors.set_size(sv_count);
+
+            // load the support vectors and their alpha values into these new column matrices
+            long idx = 0;
+            for (long i = 0; i < alpha.nr(); ++i)
+            {
+                if (alpha(i) != 0)
+                {
+                    sv_alpha(idx) = alpha(i);
+                    support_vectors(idx) = x(i);
+                    ++idx;
+                }
+            }
+
+            // now return the decision function
+            return decision_function<K> (sv_alpha, b, kernel_function, support_vectors);
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename scalar_vector_type,
+            typename scalar_vector_type2,
+            typename scalar_type
+            >
+        void calculate_rho_and_b(
+            const scalar_vector_type2& y,
+            const scalar_vector_type& alpha,
+            const scalar_vector_type& df,
+            scalar_type& rho, 
+            scalar_type& b
+        ) const
+        {
+            using namespace std;
+            long num_p_free = 0;
+            long num_n_free = 0;
+            scalar_type sum_p_free = 0;
+            scalar_type sum_n_free = 0;
+
+            scalar_type upper_bound_p = -numeric_limits<scalar_type>::infinity();
+            scalar_type upper_bound_n = -numeric_limits<scalar_type>::infinity();
+            scalar_type lower_bound_p = numeric_limits<scalar_type>::infinity();
+            scalar_type lower_bound_n = numeric_limits<scalar_type>::infinity();
+
+            for(long i = 0; i < alpha.nr(); ++i)
+            {
+                if(y(i) == 1)
+                {
+                    if(alpha(i) == 1)
+                    {
+                        if (df(i) > upper_bound_p)
+                            upper_bound_p = df(i);
+                    }
+                    else if(alpha(i) == 0)
+                    {
+                        if (df(i) < lower_bound_p)
+                            lower_bound_p = df(i);
+                    }
+                    else
+                    {
+                        ++num_p_free;
+                        sum_p_free += df(i);
+                    }
+                }
+                else
+                {
+                    if(alpha(i) == 1)
+                    {
+                        if (df(i) > upper_bound_n)
+                            upper_bound_n = df(i);
+                    }
+                    else if(alpha(i) == 0)
+                    {
+                        if (df(i) < lower_bound_n)
+                            lower_bound_n = df(i);
+                    }
+                    else
+                    {
+                        ++num_n_free;
+                        sum_n_free += df(i);
+                    }
+                }
+            }
+
+            scalar_type r1,r2;
+            if(num_p_free > 0)
+                r1 = sum_p_free/num_p_free;
+            else
+                r1 = (upper_bound_p+lower_bound_p)/2;
+
+            if(num_n_free > 0)
+                r2 = sum_n_free/num_n_free;
+            else
+                r2 = (upper_bound_n+lower_bound_n)/2;
+
+            rho = (r1+r2)/2;
+            b = (r1-r2)/2/rho;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        kernel_type kernel_function;
+        scalar_type nu;
+        long cache_size;
+        scalar_type eps;
+    }; // end of class svm_nu_trainer
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename K>
+    void swap (
+        svm_nu_trainer<K>& a,
+        svm_nu_trainer<K>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_NU_TRAINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/svm_nu_trainer_abstract.h b/ml/dlib/dlib/svm/svm_nu_trainer_abstract.h
new file mode 100644
index 000000000..5ae0fba4a
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_nu_trainer_abstract.h
@@ -0,0 +1,210 @@
+// Copyright (C) 2007  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_NU_TRAINER_ABSTRACT_
+#ifdef DLIB_SVm_NU_TRAINER_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "../optimization/optimization_solve_qp2_using_smo_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_nu_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements a trainer for a nu support vector machine for 
+                solving binary classification problems.  It is implemented using the SMO
+                algorithm.
+
+                The implementation of the nu-svm training algorithm used by this object is based
+                on the following excellent papers:
+                    - Chang and Lin, Training {nu}-Support Vector Classifiers: Theory and Algorithms
+                    - Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector 
+                      machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
+
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svm_nu_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_nu() == 0.1 
+                - #get_cache_size() == 200
+                - #get_epsilon() == 0.001
+        !*/
+
+        svm_nu_trainer (
+            const kernel_type& kernel, 
+            const scalar_type& nu
+        );
+        /*!
+            requires
+                - 0 < nu <= 1
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_kernel() == kernel
+                - #get_nu() == nu
+                - #get_cache_size() == 200
+                - #get_epsilon() == 0.001
+        !*/
+
+        void set_cache_size (
+            long cache_size
+        );
+        /*!
+            requires
+                - cache_size > 0
+            ensures
+                - #get_cache_size() == cache_size 
+        !*/
+
+        const long get_cache_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of megabytes of cache this object will use
+                  when it performs training via the this->train() function.
+                  (bigger values of this may make training go faster but won't affect 
+                  the result.  However, too big a value will cause you to run out of 
+                  memory, obviously.)
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Generally a good value for this is 0.001.  Smaller values may result
+                  in a more accurate solution but take longer to execute.
+        !*/
+
+        void set_kernel (
+            const kernel_type& k
+        );
+        /*!
+            ensures
+                - #get_kernel() == k 
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object
+        !*/
+
+        void set_nu (
+            scalar_type nu
+        );
+        /*!
+            requires
+                - 0 < nu <= 1
+            ensures
+                - #get_nu() == nu
+        !*/
+
+        const scalar_type get_nu (
+        ) const;
+        /*!
+            ensures
+                - returns the nu svm parameter.  This is a value between 0 and
+                  1.  It is the parameter that determines the trade off between
+                  trying to fit the training data exactly or allowing more errors 
+                  but hopefully improving the generalization ability of the 
+                  resulting classifier.  Smaller values encourage exact fitting 
+                  while larger values of nu may encourage better generalization. 
+                  For more information you should consult the papers referenced 
+                  above.
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - is_binary_classification_problem(x,y) == true
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+            ensures
+                - trains a nu support vector classifier given the training samples in x and 
+                  labels in y.  Training is done when the error is less than get_epsilon().
+                - returns a decision function F with the following properties:
+                    - if (new_x is a sample predicted have +1 label) then
+                        - F(new_x) >= 0
+                    - else
+                        - F(new_x) < 0
+            throws
+                - invalid_nu_error
+                  This exception is thrown if get_nu() >= maximum_nu(y)
+                - std::bad_alloc
+        !*/
+
+        void swap (
+            svm_nu_trainer& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+    }; 
+
+    template <typename K>
+    void swap (
+        svm_nu_trainer<K>& a,
+        svm_nu_trainer<K>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_NU_TRAINER_ABSTRACT_
+
+
+
diff --git a/ml/dlib/dlib/svm/svm_one_class_trainer.h b/ml/dlib/dlib/svm/svm_one_class_trainer.h
new file mode 100644
index 000000000..be3cc8caf
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_one_class_trainer.h
@@ -0,0 +1,284 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_ONE_CLASS_TRAINER_Hh_ 
+#define DLIB_SVm_ONE_CLASS_TRAINER_Hh_
+
+#include "svm_one_class_trainer_abstract.h"
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include "../matrix.h"
+#include "../algs.h"
+
+#include "function.h"
+#include "kernel.h"
+#include "../optimization/optimization_solve_qp3_using_smo.h"
+
+namespace dlib 
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_one_class_trainer
+    {
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svm_one_class_trainer (
+        ) :
+            nu(0.1),
+            cache_size(200),
+            eps(0.001)
+        {
+        }
+
+        svm_one_class_trainer (
+            const kernel_type& kernel_, 
+            const scalar_type& nu_
+        ) :
+            kernel_function(kernel_),
+            nu(nu_),
+            cache_size(200),
+            eps(0.001)
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < nu && nu <= 1,
+                "\tsvm_one_class_trainer::svm_one_class_trainer(kernel,nu)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t nu: " << nu 
+                );
+        }
+
+        void set_cache_size (
+            long cache_size_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(cache_size_ > 0,
+                "\tvoid svm_one_class_trainer::set_cache_size(cache_size_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t cache_size: " << cache_size_ 
+                );
+            cache_size = cache_size_;
+        }
+
+        long get_cache_size (
+        ) const
+        {
+            return cache_size;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid svm_one_class_trainer::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const
+        { 
+            return eps;
+        }
+
+        void set_kernel (
+            const kernel_type& k
+        )
+        {
+            kernel_function = k;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel_function;
+        }
+
+        void set_nu (
+            scalar_type nu_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(0 < nu_ && nu_ <= 1,
+                "\tvoid svm_one_class_trainer::set_nu(nu_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t nu: " << nu_ 
+                );
+            nu = nu_;
+        }
+
+        const scalar_type get_nu (
+        ) const
+        {
+            return nu;
+        }
+
+        template <
+            typename in_sample_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x
+        ) const
+        {
+            return do_train(mat(x));
+        }
+
+        void swap (
+            svm_one_class_trainer& item
+        )
+        {
+            exchange(kernel_function, item.kernel_function);
+            exchange(nu,              item.nu);
+            exchange(cache_size,      item.cache_size);
+            exchange(eps,             item.eps);
+        }
+
+    private:
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename in_sample_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x
+        ) const
+        {
+            typedef typename K::scalar_type scalar_type;
+            typedef typename decision_function<K>::sample_vector_type sample_vector_type;
+            typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_col_vector(x) && x.size() > 0,
+                "\tdecision_function svm_one_class_trainer::train(x)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                );
+
+
+            scalar_vector_type alpha;
+
+            solve_qp3_using_smo<scalar_vector_type> solver;
+
+            solver(symmetric_matrix_cache<float>(kernel_matrix(kernel_function,x), cache_size), 
+                   zeros_matrix<scalar_type>(x.size(),1),
+                   ones_matrix<scalar_type>(x.size(),1), 
+                   nu*x.size(),
+                   1,
+                   1,
+                   alpha,
+                   eps);
+
+            scalar_type rho;
+            calculate_rho(alpha,solver.get_gradient(),rho);
+
+
+            // count the number of support vectors
+            const long sv_count = (long)sum(alpha != 0);
+
+            scalar_vector_type sv_alpha;
+            sample_vector_type support_vectors;
+
+            // size these column vectors so that they have an entry for each support vector
+            sv_alpha.set_size(sv_count);
+            support_vectors.set_size(sv_count);
+
+            // load the support vectors and their alpha values into these new column matrices
+            long idx = 0;
+            for (long i = 0; i < alpha.nr(); ++i)
+            {
+                if (alpha(i) != 0)
+                {
+                    sv_alpha(idx) = alpha(i);
+                    support_vectors(idx) = x(i);
+                    ++idx;
+                }
+            }
+
+            // now return the decision function
+            return decision_function<K> (sv_alpha, rho, kernel_function, support_vectors);
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename scalar_vector_type
+            >
+        void calculate_rho(
+            const scalar_vector_type& alpha,
+            const scalar_vector_type& df,
+            scalar_type& rho
+        ) const
+        {
+            using namespace std;
+            long num_p_free = 0;
+            scalar_type sum_p_free = 0;
+
+
+            scalar_type upper_bound_p;
+            scalar_type lower_bound_p;
+
+            find_min_and_max(df, upper_bound_p, lower_bound_p);
+
+            for(long i = 0; i < alpha.nr(); ++i)
+            {
+                if(alpha(i) == 1)
+                {
+                    if (df(i) > upper_bound_p)
+                        upper_bound_p = df(i);
+                }
+                else if(alpha(i) == 0)
+                {
+                    if (df(i) < lower_bound_p)
+                        lower_bound_p = df(i);
+                }
+                else
+                {
+                    ++num_p_free;
+                    sum_p_free += df(i);
+                }
+            }
+
+            scalar_type r1;
+            if(num_p_free > 0)
+                r1 = sum_p_free/num_p_free;
+            else
+                r1 = (upper_bound_p+lower_bound_p)/2;
+
+            rho = r1;
+        } 
+
+        kernel_type kernel_function;
+        scalar_type nu;
+        long cache_size;
+        scalar_type eps;
+    }; // end of class svm_one_class_trainer
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename K>
+    void swap (
+        svm_one_class_trainer<K>& a,
+        svm_one_class_trainer<K>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_ONE_CLASS_TRAINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/svm_one_class_trainer_abstract.h b/ml/dlib/dlib/svm/svm_one_class_trainer_abstract.h
new file mode 100644
index 000000000..6b55919ad
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_one_class_trainer_abstract.h
@@ -0,0 +1,201 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_ONE_CLASS_TRAINER_ABSTRACT_
+#ifdef DLIB_SVm_ONE_CLASS_TRAINER_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "../optimization/optimization_solve_qp3_using_smo_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_one_class_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements a trainer for a support vector machine for 
+                solving one-class classification problems.  It is implemented using the SMO
+                algorithm.
+
+                The implementation of the training algorithm used by this object is based
+                on the following excellent paper:
+                    - Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector 
+                      machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
+
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svm_one_class_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_nu() == 0.1 
+                - #get_cache_size() == 200
+                - #get_epsilon() == 0.001
+        !*/
+
+        svm_one_class_trainer (
+            const kernel_type& kernel, 
+            const scalar_type& nu
+        );
+        /*!
+            requires
+                - 0 < nu <= 1
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_kernel() == kernel
+                - #get_nu() == nu
+                - #get_cache_size() == 200
+                - #get_epsilon() == 0.001
+        !*/
+
+        void set_cache_size (
+            long cache_size
+        );
+        /*!
+            requires
+                - cache_size > 0
+            ensures
+                - #get_cache_size() == cache_size 
+        !*/
+
+        const long get_cache_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of megabytes of cache this object will use
+                  when it performs training via the this->train() function.
+                  (bigger values of this may make training go faster but won't affect 
+                  the result.  However, too big a value will cause you to run out of 
+                  memory, obviously.)
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Generally a good value for this is 0.001.  Smaller values may result
+                  in a more accurate solution but take longer to execute.
+        !*/
+
+        void set_kernel (
+            const kernel_type& k
+        );
+        /*!
+            ensures
+                - #get_kernel() == k 
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object
+        !*/
+
+        void set_nu (
+            scalar_type nu
+        );
+        /*!
+            requires
+                - 0 < nu <= 1
+            ensures
+                - #get_nu() == nu
+        !*/
+
+        const scalar_type get_nu (
+        ) const;
+        /*!
+            ensures
+                - returns the nu svm parameter.  This is a value between 0 and
+                  1.  It is the parameter that determines the trade off between
+                  trying to fit the training data exactly or allowing more errors 
+                  but hopefully improving the generalization ability of the 
+                  resulting classifier.  Smaller values encourage exact fitting 
+                  while larger values of nu may encourage better generalization. 
+                  For more information you should consult the papers referenced 
+                  above.
+        !*/
+
+        template <
+            typename in_sample_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x
+        ) const;
+        /*!
+            requires
+                - x.size() > 0
+                - is_col_vector(x) == true
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+            ensures
+                - trains a one-class support vector classifier given the training samples in x.
+                  Training is done when the error is less than get_epsilon().
+                - returns a decision function F with the following properties:
+                    - if (new_x is a sample predicted to arise from the distribution
+                      which generated the training samples) then
+                        - F(new_x) >= 0
+                    - else
+                        - F(new_x) < 0
+        !*/
+
+        void swap (
+            svm_one_class_trainer& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+    }; 
+
+    template <typename K>
+    void swap (
+        svm_one_class_trainer<K>& a,
+        svm_one_class_trainer<K>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_ONE_CLASS_TRAINER_ABSTRACT_
+
+
+
diff --git a/ml/dlib/dlib/svm/svm_rank_trainer.h b/ml/dlib/dlib/svm/svm_rank_trainer.h
new file mode 100644
index 000000000..0be737f48
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_rank_trainer.h
@@ -0,0 +1,495 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVM_RANK_TrAINER_Hh_
+#define DLIB_SVM_RANK_TrAINER_Hh_
+
+#include "svm_rank_trainer_abstract.h"
+
+#include "ranking_tools.h"
+#include "../algs.h"
+#include "../optimization.h"
+#include "function.h"
+#include "kernel.h"
+#include "sparse_vector.h"
+#include <iostream>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type, 
+        typename sample_type 
+        >
+    class oca_problem_ranking_svm : public oca_problem<matrix_type >
+    {
+    public:
+        /*
+            This class is used as part of the implementation of the svm_rank_trainer
+            defined towards the end of this file.
+        */
+
+        typedef typename matrix_type::type scalar_type;
+
+        oca_problem_ranking_svm(
+            const scalar_type C_,
+            const std::vector<ranking_pair<sample_type> >& samples_,
+            const bool be_verbose_,
+            const scalar_type eps_,
+            const unsigned long max_iter,
+            const unsigned long dims_
+        ) :
+            samples(samples_),
+            C(C_),
+            be_verbose(be_verbose_),
+            eps(eps_),
+            max_iterations(max_iter),
+            dims(dims_)
+        {
+        }
+
+        virtual scalar_type get_c (
+        ) const 
+        {
+            return C;
+        }
+
+        virtual long get_num_dimensions (
+        ) const 
+        {
+            return dims;
+        }
+
+        virtual bool optimization_status (
+            scalar_type current_objective_value,
+            scalar_type current_error_gap,
+            scalar_type current_risk_value,
+            scalar_type current_risk_gap,
+            unsigned long num_cutting_planes,
+            unsigned long num_iterations
+        ) const 
+        {
+            if (be_verbose)
+            {
+                using namespace std;
+                cout << "objective:     " << current_objective_value << endl;
+                cout << "objective gap: " << current_error_gap << endl;
+                cout << "risk:          " << current_risk_value << endl;
+                cout << "risk gap:      " << current_risk_gap << endl;
+                cout << "num planes:    " << num_cutting_planes << endl;
+                cout << "iter:          " << num_iterations << endl;
+                cout << endl;
+            }
+
+            if (num_iterations >= max_iterations)
+                return true;
+
+            if (current_risk_gap < eps)
+                return true;
+
+            return false;
+        }
+
+        virtual bool risk_has_lower_bound (
+            scalar_type& lower_bound
+        ) const 
+        { 
+            lower_bound = 0;
+            return true; 
+        }
+
+        virtual void get_risk (
+            matrix_type& w,
+            scalar_type& risk,
+            matrix_type& subgradient
+        ) const 
+        {
+            subgradient.set_size(w.size(),1);
+            subgradient = 0;
+            risk = 0;
+
+            // Note that we want the risk value to be in terms of the fraction of overall
+            // rank flips.  So a risk of 0.1 would mean that rank flips happen < 10% of the
+            // time.
+
+
+            std::vector<double> rel_scores;
+            std::vector<double> nonrel_scores;
+            std::vector<unsigned long> rel_counts;
+            std::vector<unsigned long> nonrel_counts;
+
+            unsigned long total_pairs = 0;
+
+            // loop over all the samples and compute the risk and its subgradient at the current solution point w
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                rel_scores.resize(samples[i].relevant.size());
+                nonrel_scores.resize(samples[i].nonrelevant.size());
+
+                for (unsigned long k = 0; k < rel_scores.size(); ++k)
+                    rel_scores[k] = dot(samples[i].relevant[k], w);
+
+                for (unsigned long k = 0; k < nonrel_scores.size(); ++k)
+                    nonrel_scores[k] = dot(samples[i].nonrelevant[k], w) + 1;
+
+                count_ranking_inversions(rel_scores, nonrel_scores, rel_counts, nonrel_counts);
+
+                total_pairs += rel_scores.size()*nonrel_scores.size();
+
+                for (unsigned long k = 0; k < rel_counts.size(); ++k)
+                {
+                    if (rel_counts[k] != 0)
+                    {
+                        risk -= rel_counts[k]*rel_scores[k];
+                        subtract_from(subgradient, samples[i].relevant[k], rel_counts[k]); 
+                    }
+                }
+
+                for (unsigned long k = 0; k < nonrel_counts.size(); ++k)
+                {
+                    if (nonrel_counts[k] != 0)
+                    {
+                        risk += nonrel_counts[k]*nonrel_scores[k];
+                        add_to(subgradient, samples[i].nonrelevant[k], nonrel_counts[k]); 
+                    }
+                }
+
+            }
+
+            const scalar_type scale = 1.0/total_pairs;
+
+            risk *= scale;
+            subgradient = scale*subgradient;
+        }
+
+    private:
+
+    // -----------------------------------------------------
+    // -----------------------------------------------------
+
+
+        const std::vector<ranking_pair<sample_type> >& samples;
+        const scalar_type C;
+
+        const bool be_verbose;
+        const scalar_type eps;
+        const unsigned long max_iterations;
+        const unsigned long dims;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type, 
+        typename sample_type,
+        typename scalar_type
+        >
+    oca_problem_ranking_svm<matrix_type, sample_type> make_oca_problem_ranking_svm (
+        const scalar_type C,
+        const std::vector<ranking_pair<sample_type> >& samples,
+        const bool be_verbose,
+        const scalar_type eps,
+        const unsigned long max_iterations,
+        const unsigned long dims
+    )
+    {
+        return oca_problem_ranking_svm<matrix_type, sample_type>(
+            C, samples, be_verbose, eps, max_iterations, dims);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_rank_trainer
+    {
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        // You are getting a compiler error on this line because you supplied a non-linear kernel
+        // to the svm_rank_trainer object.  You have to use one of the linear kernels with this
+        // trainer.
+        COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value ||
+                             is_same_type<K, sparse_linear_kernel<sample_type> >::value ));
+
+        svm_rank_trainer (
+        )
+        {
+            C = 1;
+            verbose = false;
+            eps = 0.001;
+            max_iterations = 10000;
+            learn_nonnegative_weights = false;
+            last_weight_1 = false;
+        }
+
+        explicit svm_rank_trainer (
+            const scalar_type& C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t svm_rank_trainer::svm_rank_trainer()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+            verbose = false;
+            eps = 0.001;
+            max_iterations = 10000;
+            learn_nonnegative_weights = false;
+            last_weight_1 = false;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void svm_rank_trainer::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const { return eps; }
+
+        unsigned long get_max_iterations (
+        ) const { return max_iterations; }
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ) 
+        {
+            max_iterations = max_iter;
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        bool forces_last_weight_to_1 (
+        ) const
+        {
+            return last_weight_1;
+        }
+
+        void force_last_weight_to_1 (
+            bool should_last_weight_be_1
+        )
+        {
+            last_weight_1 = should_last_weight_be_1;
+            if (last_weight_1)
+                prior.set_size(0);
+        }
+
+        void set_oca (
+            const oca& item
+        )
+        {
+            solver = item;
+        }
+
+        const oca get_oca (
+        ) const
+        {
+            return solver;
+        }
+
+        const kernel_type get_kernel (
+        ) const
+        {
+            return kernel_type();
+        }
+
+        bool learns_nonnegative_weights (
+        ) const { return learn_nonnegative_weights; }
+       
+        void set_learns_nonnegative_weights (
+            bool value
+        )
+        {
+            learn_nonnegative_weights = value;
+            if (learn_nonnegative_weights)
+                prior.set_size(0); 
+        }
+
+        void set_prior (
+            const trained_function_type& prior_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(prior_.basis_vectors.size() == 1 &&
+                        prior_.alpha(0) == 1,
+                "\t void svm_rank_trainer::set_prior()"
+                << "\n\t The supplied prior could not have been created by this object's train() method."
+                << "\n\t prior_.basis_vectors.size(): " << prior_.basis_vectors.size() 
+                << "\n\t prior_.alpha(0):             " << prior_.alpha(0) 
+                << "\n\t this: " << this
+                );
+
+            prior = sparse_to_dense(prior_.basis_vectors(0));
+            learn_nonnegative_weights = false;
+            last_weight_1 = false;
+        }
+
+        bool has_prior (
+        ) const
+        {
+            return prior.size() != 0;
+        }
+
+        void set_c (
+            scalar_type C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void svm_rank_trainer::set_c()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+        }
+
+        const scalar_type get_c (
+        ) const
+        {
+            return C;
+        }
+
+        const decision_function<kernel_type> train (
+            const std::vector<ranking_pair<sample_type> >& samples
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_CASSERT(is_ranking_problem(samples) == true,
+                "\t decision_function svm_rank_trainer::train(samples)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t samples.size(): " << samples.size() 
+                << "\n\t is_ranking_problem(samples): " << is_ranking_problem(samples)
+                );
+
+
+            typedef matrix<scalar_type,0,1> w_type;
+            w_type w;
+
+            const unsigned long num_dims = max_index_plus_one(samples);
+
+            unsigned long num_nonnegative = 0;
+            if (learn_nonnegative_weights)
+            {
+                num_nonnegative = num_dims;
+            }
+
+            unsigned long force_weight_1_idx = std::numeric_limits<unsigned long>::max(); 
+            if (last_weight_1)
+            {
+                force_weight_1_idx = num_dims-1;
+            }
+
+            if (has_prior())
+            {
+                if (is_matrix<sample_type>::value)
+                {
+                    // make sure requires clause is not broken
+                    DLIB_CASSERT(num_dims == (unsigned long)prior.size(),
+                        "\t decision_function svm_rank_trainer::train(samples)"
+                        << "\n\t The dimension of the training vectors must match the dimension of\n"
+                        << "\n\t those used to create the prior."
+                        << "\n\t num_dims:     " << num_dims 
+                        << "\n\t prior.size(): " << prior.size() 
+                    );
+                }
+                const unsigned long dims = std::max(num_dims, (unsigned long)prior.size());
+                // In the case of sparse sample vectors, it is possible that the input
+                // vector dimensionality is larger than the prior vector dimensionality.
+                // We need to check for this case and pad prior with zeros if it is the
+                // case.
+                if ((unsigned long)prior.size() < dims)
+                {
+                    matrix<scalar_type,0,1> prior_temp = join_cols(prior, zeros_matrix<scalar_type>(dims-prior.size(),1));
+                    solver( make_oca_problem_ranking_svm<w_type>(C, samples, verbose, eps, max_iterations, dims), 
+                        w, 
+                        prior_temp);
+                }
+                else
+                {
+                    solver( make_oca_problem_ranking_svm<w_type>(C, samples, verbose, eps, max_iterations, dims), 
+                        w, 
+                        prior);
+                }
+
+            }
+            else
+            {
+                solver( make_oca_problem_ranking_svm<w_type>(C, samples, verbose, eps, max_iterations, num_dims), 
+                    w, 
+                    num_nonnegative,
+                    force_weight_1_idx);
+            }
+
+
+            // put the solution into a decision function and then return it
+            decision_function<kernel_type> df;
+            df.b = 0;
+            df.basis_vectors.set_size(1);
+            // Copy the results into the output basis vector.  The output vector might be a
+            // sparse vector container so we need to use this special kind of copy to
+            // handle that case.
+            assign(df.basis_vectors(0), matrix_cast<scalar_type>(w));
+            df.alpha.set_size(1);
+            df.alpha(0) = 1;
+
+            return df;
+        }
+
+        const decision_function<kernel_type> train (
+            const ranking_pair<sample_type>& sample
+        ) const
+        {
+            return train(std::vector<ranking_pair<sample_type> >(1, sample));
+        }
+
+    private:
+
+        scalar_type C;
+        oca solver;
+        scalar_type eps;
+        bool verbose;
+        unsigned long max_iterations;
+        bool learn_nonnegative_weights;
+        bool last_weight_1;
+        matrix<scalar_type,0,1> prior;
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVM_RANK_TrAINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/svm_rank_trainer_abstract.h b/ml/dlib/dlib/svm/svm_rank_trainer_abstract.h
new file mode 100644
index 000000000..4658d950f
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_rank_trainer_abstract.h
@@ -0,0 +1,298 @@
+// Copyright (C) 2012  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVM_RANK_TrAINER_ABSTRACT_Hh_
+#ifdef DLIB_SVM_RANK_TrAINER_ABSTRACT_Hh_
+
+#include "ranking_tools_abstract.h"
+#include "sparse_vector_abstract.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "../algs.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svm_rank_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                Is either linear_kernel or sparse_linear_kernel.  
+
+            WHAT THIS OBJECT REPRESENTS
+                This object represents a tool for training a ranking support vector machine
+                using linear kernels.  In particular, this object is a tool for training
+                the Ranking SVM described in the paper: 
+                    Optimizing Search Engines using Clickthrough Data by Thorsten Joachims
+
+                Note that we normalize the C parameter by multiplying it by 1/(number of ranking pairs).
+                Therefore, to make an exact comparison between this object and Equation 12
+                in the paper you must multiply C by the appropriate normalizing quantity. 
+    
+                Finally, note that the implementation of this object is done using the oca
+                optimizer and count_ranking_inversions() method.  This means that it runs
+                in O(n*log(n)) time, making it suitable for use with large datasets.
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svm_rank_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used to train a
+                  ranking support vector machine.
+                - #get_oca() == oca() (i.e. an instance of oca with default parameters) 
+                - #get_c() == 1
+                - #get_epsilon() == 0.001
+                - this object will not be verbose unless be_verbose() is called
+                - #get_max_iterations() == 10000
+                - #learns_nonnegative_weights() == false
+                - #forces_last_weight_to_1() == false
+                - #has_prior() == false
+        !*/
+
+        explicit svm_rank_trainer (
+            const scalar_type& C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - This object is properly initialized and ready to be used to train a
+                  ranking support vector machine.
+                - #get_oca() == oca() (i.e. an instance of oca with default parameters) 
+                - #get_c() == C
+                - #get_epsilon() == 0.001
+                - this object will not be verbose unless be_verbose() is called
+                - #get_max_iterations() == 10000
+                - #learns_nonnegative_weights() == false
+                - #forces_last_weight_to_1() == false
+                - #has_prior() == false
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        );
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer to
+                  train.  You can think of this epsilon value as saying "solve the
+                  optimization problem until the average ranking accuracy is within epsilon
+                  of its optimal value".  Here we mean "ranking accuracy" in the same sense
+                  used by test_ranking_function() and cross_validate_ranking_trainer().
+        !*/
+
+        unsigned long get_max_iterations (
+        ) const; 
+        /*!
+            ensures
+                - returns the maximum number of iterations the SVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a user can
+                  observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        bool forces_last_weight_to_1 (
+        ) const;
+        /*!
+            ensures
+                - returns true if this trainer has the constraint that the last weight in
+                  the learned parameter vector must be 1.  This is the weight corresponding
+                  to the feature in the training vectors with the highest dimension.
+        !*/
+
+        void force_last_weight_to_1 (
+            bool should_last_weight_be_1
+        );
+        /*!
+            ensures
+                - #forces_last_weight_to_1() == should_last_weight_be_1
+                - if (should_last_weight_be_1 == true) then
+                    - #has_prior() == false
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the optimizer used to solve the SVM problem.  
+        !*/
+
+        const kernel_type get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object.  Since the
+                  linear kernels don't have any parameters this function just returns
+                  kernel_type()
+        !*/
+
+        bool learns_nonnegative_weights (
+        ) const; 
+        /*!
+            ensures
+                - The output of training is a weight vector that defines the behavior of
+                  the resulting decision function.  That is, the decision function simply
+                  takes the dot product between the learned weight vector and a test sample
+                  and returns the result.  Therefore, if learns_nonnegative_weights() == true 
+                  then the resulting learned weight vector will always have non-negative
+                  entries.  
+        !*/
+       
+        void set_learns_nonnegative_weights (
+            bool value
+        );
+        /*!
+            ensures
+                - #learns_nonnegative_weights() == value
+                - if (value == true) then
+                    - #has_prior() == false
+        !*/
+
+        void set_prior (
+            const trained_function_type& prior
+        );
+        /*!
+            requires
+                - prior == a function produced by a call to this class's train() function.  
+                  Therefore, it must be the case that:
+                    - prior.basis_vectors.size() == 1
+                    - prior.alpha(0) == 1
+            ensures
+                - Subsequent calls to train() will try to learn a function similar to the
+                  given prior.
+                - #has_prior() == true
+                - #learns_nonnegative_weights() == false
+                - #forces_last_weight_to_1() == false
+        !*/
+
+        bool has_prior (
+        ) const
+        /*!
+            ensures
+                - returns true if a prior has been set and false otherwise.  Having a prior
+                  set means that you have called set_prior() and supplied a previously
+                  trained function as a reference.  In this case, any call to train() will
+                  try to learn a function that matches the behavior of the prior as close
+                  as possible but also fits the supplied training data.  In more technical
+                  detail, having a prior means we replace the ||w||^2 regularizer with one
+                  of the form ||w-prior||^2 where w is the set of parameters for a learned
+                  function.
+        !*/
+
+        void set_c (
+            scalar_type C 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() == C 
+        !*/
+
+        const scalar_type get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter that
+                  determines the trade off between trying to fit the training data exactly
+                  or allowing more errors but hopefully improving the generalization of the
+                  resulting classifier.  Larger values encourage exact fitting while
+                  smaller values of C may encourage better generalization. 
+        !*/
+
+        const decision_function<kernel_type> train (
+            const std::vector<ranking_pair<sample_type> >& samples
+        ) const;
+        /*!
+            requires
+                - is_ranking_problem(samples) == true
+                - if (has_prior()) then
+                    - The vectors in samples must have the same dimensionality as the
+                      vectors used to train the prior given to set_prior().  
+            ensures
+                - trains a ranking support vector classifier given the training samples.  
+                - returns a decision function F with the following properties:
+                    - F.alpha.size() == 1
+                    - F.basis_vectors.size() == 1
+                    - F.alpha(0) == 1
+                    - Given two vectors, A and B, then A is predicted to come before B 
+                      in the learned ranking if and only if F(A) > F(B).
+                    - Based on the contents of samples, F will attempt to give relevant
+                      vectors higher scores than non-relevant vectors.
+        !*/
+
+        const decision_function<kernel_type> train (
+            const ranking_pair<sample_type>& sample
+        ) const;
+        /*!
+            requires
+                - is_ranking_problem(std::vector<ranking_pair<sample_type> >(1, sample)) == true
+                - if (has_prior()) then
+                    - The vectors in samples must have the same dimensionality as the
+                      vectors used to train the prior given to set_prior().  
+            ensures
+                - This is just a convenience routine for calling the above train()
+                  function.  That is, it just copies sample into a std::vector object and
+                  invokes the above train() method.  This means that calling this function
+                  is equivalent to invoking: 
+                    return train(std::vector<ranking_pair<sample_type> >(1, sample));
+        !*/
+
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVM_RANK_TrAINER_ABSTRACT_Hh_
+
diff --git a/ml/dlib/dlib/svm/svm_threaded.h b/ml/dlib/dlib/svm/svm_threaded.h
new file mode 100644
index 000000000..37927456b
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_threaded.h
@@ -0,0 +1,253 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_THREADED_
+#define DLIB_SVm_THREADED_
+
+#include <cmath>
+#include <iostream>
+#include <limits>
+#include <sstream>
+#include <vector>
+
+#include "svm_threaded_abstract.h"
+#include "svm.h"
+#include "../matrix.h"
+#include "../algs.h"
+#include "../serialize.h"
+#include "function.h"
+#include "kernel.h"
+#include "../threads.h"
+#include "../pipe.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    namespace cvtti_helpers
+    {
+        template <typename trainer_type, typename in_sample_vector_type>
+        struct job
+        {
+            typedef typename trainer_type::scalar_type scalar_type;
+            typedef typename trainer_type::sample_type sample_type;
+            typedef typename trainer_type::mem_manager_type mem_manager_type;
+            typedef matrix<sample_type,0,1,mem_manager_type> sample_vector_type;
+            typedef matrix<scalar_type,0,1,mem_manager_type> scalar_vector_type;
+
+            job() : x(0) {}
+
+            trainer_type trainer;
+            matrix<long,0,1> x_test, x_train;
+            scalar_vector_type y_test, y_train;
+            const in_sample_vector_type* x;
+        };
+
+        struct task  
+        {
+            template <
+                typename trainer_type,
+                typename mem_manager_type,
+                typename in_sample_vector_type
+                >
+            void operator()(
+                job<trainer_type,in_sample_vector_type>& j,
+                matrix<double,1,2,mem_manager_type>& result
+            )
+            {
+                try
+                {
+                    result = test_binary_decision_function(j.trainer.train(rowm(*j.x,j.x_train), j.y_train), rowm(*j.x,j.x_test), j.y_test);
+
+                    // Do this just to make j release it's memory since people might run threaded cross validation
+                    // on very large datasets.  Every bit of freed memory helps out.
+                    j = job<trainer_type,in_sample_vector_type>();
+                }
+                catch (invalid_nu_error&)
+                {
+                    // If this is a svm_nu_trainer then we might get this exception if the nu is
+                    // invalid.  In this case just return a cross validation score of 0.
+                    result = 0;
+                }
+                catch (std::bad_alloc&)
+                {
+                    std::cerr << "\nstd::bad_alloc thrown while running cross_validate_trainer_threaded().  Not enough memory.\n" << std::endl;
+                    throw;
+                }
+            }
+        };
+    }
+
+    template <
+        typename trainer_type,
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type
+        >
+    const matrix<double, 1, 2, typename trainer_type::mem_manager_type> 
+    cross_validate_trainer_threaded_impl (
+        const trainer_type& trainer,
+        const in_sample_vector_type& x,
+        const in_scalar_vector_type& y,
+        const long folds,
+        const long num_threads
+    )
+    {
+        using namespace dlib::cvtti_helpers;
+        typedef typename trainer_type::mem_manager_type mem_manager_type;
+
+        // make sure requires clause is not broken
+        DLIB_ASSERT(is_binary_classification_problem(x,y) == true &&
+                    1 < folds && folds <= std::min(sum(y>0),sum(y<0)) &&
+                    num_threads > 0,
+            "\tmatrix cross_validate_trainer()"
+            << "\n\t invalid inputs were given to this function"
+            << "\n\t std::min(sum(y>0),sum(y<0)): " << std::min(sum(y>0),sum(y<0))
+            << "\n\t folds:  " << folds 
+            << "\n\t num_threads:  " << num_threads 
+            << "\n\t is_binary_classification_problem(x,y): " << ((is_binary_classification_problem(x,y))? "true":"false")
+            );
+
+
+        task mytask;
+        thread_pool tp(num_threads);
+
+
+        // count the number of positive and negative examples
+        long num_pos = 0;
+        long num_neg = 0;
+        for (long r = 0; r < y.nr(); ++r)
+        {
+            if (y(r) == +1.0)
+                ++num_pos;
+            else
+                ++num_neg;
+        }
+
+        // figure out how many positive and negative examples we will have in each fold
+        const long num_pos_test_samples = num_pos/folds; 
+        const long num_pos_train_samples = num_pos - num_pos_test_samples; 
+        const long num_neg_test_samples = num_neg/folds; 
+        const long num_neg_train_samples = num_neg - num_neg_test_samples; 
+
+
+        long pos_idx = 0;
+        long neg_idx = 0;
+
+
+
+        std::vector<future<job<trainer_type,in_sample_vector_type> > > jobs(folds);
+        std::vector<future<matrix<double, 1, 2, mem_manager_type> > > results(folds);
+
+
+        for (long i = 0; i < folds; ++i)
+        {
+            job<trainer_type,in_sample_vector_type>& j = jobs[i].get();
+
+            j.x = &x;
+            j.x_test.set_size (num_pos_test_samples  + num_neg_test_samples);
+            j.y_test.set_size (num_pos_test_samples  + num_neg_test_samples);
+            j.x_train.set_size(num_pos_train_samples + num_neg_train_samples);
+            j.y_train.set_size(num_pos_train_samples + num_neg_train_samples);
+            j.trainer = trainer;
+
+            long cur = 0;
+
+            // load up our positive test samples
+            while (cur < num_pos_test_samples)
+            {
+                if (y(pos_idx) == +1.0)
+                {
+                    j.x_test(cur) = pos_idx;
+                    j.y_test(cur) = +1.0;
+                    ++cur;
+                }
+                pos_idx = (pos_idx+1)%x.nr();
+            }
+
+            // load up our negative test samples
+            while (cur < j.x_test.nr())
+            {
+                if (y(neg_idx) == -1.0)
+                {
+                    j.x_test(cur) = neg_idx;
+                    j.y_test(cur) = -1.0;
+                    ++cur;
+                }
+                neg_idx = (neg_idx+1)%x.nr();
+            }
+
+            // load the training data from the data following whatever we loaded
+            // as the testing data
+            long train_pos_idx = pos_idx;
+            long train_neg_idx = neg_idx;
+            cur = 0;
+
+            // load up our positive train samples
+            while (cur < num_pos_train_samples)
+            {
+                if (y(train_pos_idx) == +1.0)
+                {
+                    j.x_train(cur) = train_pos_idx;
+                    j.y_train(cur) = +1.0;
+                    ++cur;
+                }
+                train_pos_idx = (train_pos_idx+1)%x.nr();
+            }
+
+            // load up our negative train samples
+            while (cur < j.x_train.nr())
+            {
+                if (y(train_neg_idx) == -1.0)
+                {
+                    j.x_train(cur) = train_neg_idx;
+                    j.y_train(cur) = -1.0;
+                    ++cur;
+                }
+                train_neg_idx = (train_neg_idx+1)%x.nr();
+            }
+
+            // finally spawn a task to process this job
+            tp.add_task(mytask, jobs[i], results[i]);
+
+        } // for (long i = 0; i < folds; ++i)
+
+        matrix<double, 1, 2, mem_manager_type> res;
+        set_all_elements(res,0);
+
+        // now compute the total results
+        for (long i = 0; i < folds; ++i)
+        {
+            res += results[i].get();
+        }
+
+        return res/(double)folds;
+    }
+
+    template <
+        typename trainer_type,
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type
+        >
+    const matrix<double, 1, 2, typename trainer_type::mem_manager_type> 
+    cross_validate_trainer_threaded (
+        const trainer_type& trainer,
+        const in_sample_vector_type& x,
+        const in_scalar_vector_type& y,
+        const long folds,
+        const long num_threads
+    )
+    {
+        return cross_validate_trainer_threaded_impl(trainer,
+                                           mat(x),
+                                           mat(y),
+                                           folds,
+                                           num_threads);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_THREADED_
+
+
diff --git a/ml/dlib/dlib/svm/svm_threaded_abstract.h b/ml/dlib/dlib/svm/svm_threaded_abstract.h
new file mode 100644
index 000000000..f9973fb5c
--- /dev/null
+++ b/ml/dlib/dlib/svm/svm_threaded_abstract.h
@@ -0,0 +1,62 @@
+// Copyright (C) 2008  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_THREADED_ABSTRACT_
+#ifdef DLIB_SVm_THREADED_ABSTRACT_
+
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "../svm.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename trainer_type,
+        typename in_sample_vector_type,
+        typename in_scalar_vector_type
+        >
+    const matrix<double, 1, 2, typename trainer_type::mem_manager_type> 
+    cross_validate_trainer_threaded (
+        const trainer_type& trainer,
+        const in_sample_vector_type& x,
+        const in_scalar_vector_type& y,
+        const long folds,
+        const long num_threads
+    );
+    /*!
+        requires
+            - is_binary_classification_problem(x,y) == true
+            - 1 < folds <= std::min(sum(y>0),sum(y<0))
+              (e.g. There must be at least as many examples of each class as there are folds)
+            - trainer_type == some kind of trainer object (e.g. svm_nu_trainer)
+            - num_threads > 0
+            - It must be safe for multiple trainer objects to access the elements of x from
+              multiple threads at the same time.  Note that all trainers and kernels in
+              dlib are thread safe in this regard since they do not mutate the elements of x.
+        ensures
+            - performs k-fold cross validation by using the given trainer to solve the
+              given binary classification problem for the given number of folds.
+              Each fold is tested using the output of the trainer and the average 
+              classification accuracy from all folds is returned.  
+            - uses num_threads threads of execution in doing the cross validation.  
+            - The accuracy is returned in a row vector, let us call it R.  Both 
+              quantities in R are numbers between 0 and 1 which represent the fraction 
+              of examples correctly classified.  R(0) is the fraction of +1 examples 
+              correctly classified and R(1) is the fraction of -1 examples correctly 
+              classified.
+            - The number of folds used is given by the folds argument.
+        throws
+            - any exceptions thrown by trainer.train()
+            - std::bad_alloc
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_THREADED_ABSTRACT_
+
+
+
diff --git a/ml/dlib/dlib/svm/svr_linear_trainer.h b/ml/dlib/dlib/svm/svr_linear_trainer.h
new file mode 100644
index 000000000..27ce5b52a
--- /dev/null
+++ b/ml/dlib/dlib/svm/svr_linear_trainer.h
@@ -0,0 +1,424 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVR_LINEAR_TrAINER_Hh_
+#define DLIB_SVR_LINEAR_TrAINER_Hh_
+
+#include "svr_linear_trainer_abstract.h"
+
+#include "../algs.h"
+#include "../optimization.h"
+#include "function.h"
+#include "kernel.h"
+#include "sparse_vector.h"
+#include <iostream>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type, 
+        typename sample_type 
+        >
+    class oca_problem_linear_svr : public oca_problem<matrix_type >
+    {
+    public:
+        /*
+            This class is used as part of the implementation of the svr_linear_trainer 
+            defined towards the end of this file.
+        */
+
+        typedef typename matrix_type::type scalar_type;
+
+        oca_problem_linear_svr(
+            const scalar_type C_,
+            const std::vector<sample_type>& samples_,
+            const std::vector<scalar_type>& targets_,
+            const bool be_verbose_,
+            const scalar_type eps_,
+            const scalar_type eps_insensitivity_,
+            const unsigned long max_iter
+        ) :
+            samples(samples_),
+            targets(targets_),
+            C(C_),
+            be_verbose(be_verbose_),
+            eps(eps_),
+            eps_insensitivity(eps_insensitivity_),
+            max_iterations(max_iter)
+        {
+        }
+
+        virtual scalar_type get_c (
+        ) const 
+        {
+            return C;
+        }
+
+        virtual long get_num_dimensions (
+        ) const 
+        {
+            // plus one for the bias term
+            return max_index_plus_one(samples) + 1;
+        }
+
+        virtual bool optimization_status (
+            scalar_type current_objective_value,
+            scalar_type current_error_gap,
+            scalar_type current_risk_value,
+            scalar_type current_risk_gap,
+            unsigned long num_cutting_planes,
+            unsigned long num_iterations
+        ) const 
+        {
+            current_risk_value /= samples.size();
+            current_risk_gap /= samples.size();
+            if (be_verbose)
+            {
+                using namespace std;
+                cout << "objective:     " << current_objective_value << endl;
+                cout << "objective gap: " << current_error_gap << endl;
+                cout << "risk:          " << current_risk_value << endl;
+                cout << "risk gap:      " << current_risk_gap << endl;
+                cout << "num planes:    " << num_cutting_planes << endl;
+                cout << "iter:          " << num_iterations << endl;
+                cout << endl;
+            }
+
+            if (num_iterations >= max_iterations)
+                return true;
+
+            if (current_risk_gap < eps*eps_insensitivity)
+                return true;
+
+            return false;
+        }
+
+        virtual bool risk_has_lower_bound (
+            scalar_type& lower_bound
+        ) const 
+        { 
+            lower_bound = 0;
+            return true; 
+        }
+
+        virtual void get_risk (
+            matrix_type& w,
+            scalar_type& risk,
+            matrix_type& subgradient
+        ) const 
+        {
+            subgradient.set_size(w.size(),1);
+            subgradient = 0;
+            risk = 0;
+
+            // loop over all the samples and compute the risk and its subgradient at the current solution point w
+            for (unsigned long i = 0; i < samples.size(); ++i)
+            {
+                const long w_size_m1 = w.size()-1;
+                const scalar_type prediction = dot(colm(w,0,w_size_m1), samples[i]) - w(w_size_m1);
+
+                if (std::abs(prediction - targets[i]) > eps_insensitivity)
+                {
+                    if (prediction < targets[i])
+                    {
+                        subtract_from(subgradient, samples[i]); 
+                        subgradient(w_size_m1) += 1;
+                    }
+                    else
+                    {
+                        add_to(subgradient, samples[i]); 
+                        subgradient(w_size_m1) -= 1;
+                    }
+
+                    risk += std::abs(prediction - targets[i]) - eps_insensitivity;
+                }
+            }
+        }
+
+    private:
+
+    // -----------------------------------------------------
+    // -----------------------------------------------------
+
+
+        const std::vector<sample_type>& samples;
+        const std::vector<scalar_type>& targets;
+        const scalar_type C;
+
+        const bool be_verbose;
+        const scalar_type eps;
+        const scalar_type eps_insensitivity;
+        const unsigned long max_iterations;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename matrix_type, 
+        typename sample_type,
+        typename scalar_type
+        >
+    oca_problem_linear_svr<matrix_type, sample_type> make_oca_problem_linear_svr (
+        const scalar_type C,
+        const std::vector<sample_type>& samples,
+        const std::vector<scalar_type>& targets,
+        const bool be_verbose,
+        const scalar_type eps,
+        const scalar_type eps_insensitivity,
+        const unsigned long max_iterations
+    )
+    {
+        return oca_problem_linear_svr<matrix_type, sample_type>(
+            C, samples, targets, be_verbose, eps, eps_insensitivity, max_iterations);
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svr_linear_trainer
+    {
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        // You are getting a compiler error on this line because you supplied a non-linear kernel
+        // to the svr_linear_trainer object.  You have to use one of the linear kernels with this
+        // trainer.
+        COMPILE_TIME_ASSERT((is_same_type<K, linear_kernel<sample_type> >::value ||
+                             is_same_type<K, sparse_linear_kernel<sample_type> >::value ));
+
+        svr_linear_trainer (
+        )
+        {
+            C = 1;
+            verbose = false;
+            eps = 0.01;
+            max_iterations = 10000;
+            learn_nonnegative_weights = false;
+            last_weight_1 = false;
+            eps_insensitivity = 0.1;
+        }
+
+        explicit svr_linear_trainer (
+            const scalar_type& C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t svr_linear_trainer::svr_linear_trainer()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+            verbose = false;
+            eps = 0.01;
+            max_iterations = 10000;
+            learn_nonnegative_weights = false;
+            last_weight_1 = false;
+            eps_insensitivity = 0.1;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void svr_linear_trainer::set_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const { return eps; }
+
+        void set_epsilon_insensitivity (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid svr_linear_trainer::set_epsilon_insensitivity(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps_: " << eps_ 
+                );
+            eps_insensitivity = eps_;
+        }
+
+        const scalar_type get_epsilon_insensitivity (
+        ) const
+        { 
+            return eps_insensitivity;
+        }
+
+        unsigned long get_max_iterations (
+        ) const { return max_iterations; }
+
+        void set_max_iterations (
+            unsigned long max_iter
+        ) 
+        {
+            max_iterations = max_iter;
+        }
+
+        void be_verbose (
+        )
+        {
+            verbose = true;
+        }
+
+        void be_quiet (
+        )
+        {
+            verbose = false;
+        }
+
+        bool forces_last_weight_to_1 (
+        ) const
+        {
+            return last_weight_1;
+        }
+
+        void force_last_weight_to_1 (
+            bool should_last_weight_be_1
+        )
+        {
+            last_weight_1 = should_last_weight_be_1;
+        }
+
+        void set_oca (
+            const oca& item
+        )
+        {
+            solver = item;
+        }
+
+        const oca get_oca (
+        ) const
+        {
+            return solver;
+        }
+
+        const kernel_type get_kernel (
+        ) const
+        {
+            return kernel_type();
+        }
+
+        bool learns_nonnegative_weights (
+        ) const { return learn_nonnegative_weights; }
+       
+        void set_learns_nonnegative_weights (
+            bool value
+        )
+        {
+            learn_nonnegative_weights = value;
+        }
+
+        void set_c (
+            scalar_type C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void svr_linear_trainer::set_c()"
+                << "\n\t C_ must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+        }
+
+        const scalar_type get_c (
+        ) const
+        {
+            return C;
+        }
+
+        const decision_function<kernel_type> train (
+            const std::vector<sample_type>& samples,
+            const std::vector<scalar_type>& targets
+        ) const
+        {
+            // make sure requires clause is not broken
+            DLIB_CASSERT(is_learning_problem(samples, targets) == true,
+                "\t decision_function svr_linear_trainer::train(samples, targets)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t samples.size(): " << samples.size() 
+                << "\n\t targets.size(): " << targets.size() 
+                << "\n\t is_learning_problem(samples,targets): " << is_learning_problem(samples,targets)
+                );
+
+
+            typedef matrix<scalar_type,0,1> w_type;
+            w_type w;
+
+            const unsigned long num_dims = max_index_plus_one(samples);
+
+            unsigned long num_nonnegative = 0;
+            if (learn_nonnegative_weights)
+            {
+                num_nonnegative = num_dims;
+            }
+
+            unsigned long force_weight_1_idx = std::numeric_limits<unsigned long>::max(); 
+            if (last_weight_1)
+            {
+                force_weight_1_idx = num_dims-1;
+            }
+
+            solver( make_oca_problem_linear_svr<w_type>(C, samples, targets, verbose, eps, eps_insensitivity, max_iterations), 
+                    w, 
+                    num_nonnegative,
+                    force_weight_1_idx);
+
+
+            // put the solution into a decision function and then return it
+            decision_function<kernel_type> df;
+            df.b = static_cast<scalar_type>(w(w.size()-1));
+            df.basis_vectors.set_size(1);
+            // Copy the plane normal into the output basis vector.  The output vector might be a
+            // sparse vector container so we need to use this special kind of copy to handle that case.
+            // As an aside, the reason for using max_index_plus_one() and not just w.size()-1 is because
+            // doing it this way avoids an inane warning from gcc that can occur in some cases.
+            const long out_size = max_index_plus_one(samples);
+            assign(df.basis_vectors(0), matrix_cast<scalar_type>(colm(w, 0, out_size)));
+            df.alpha.set_size(1);
+            df.alpha(0) = 1;
+
+            return df;
+        }
+
+    private:
+
+        scalar_type C;
+        oca solver;
+        scalar_type eps;
+        bool verbose;
+        unsigned long max_iterations;
+        bool learn_nonnegative_weights;
+        bool last_weight_1;
+        scalar_type eps_insensitivity;
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVR_LINEAR_TrAINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/svr_linear_trainer_abstract.h b/ml/dlib/dlib/svm/svr_linear_trainer_abstract.h
new file mode 100644
index 000000000..c74310f06
--- /dev/null
+++ b/ml/dlib/dlib/svm/svr_linear_trainer_abstract.h
@@ -0,0 +1,269 @@
+// Copyright (C) 2013  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVR_LINEAR_TrAINER_ABSTRACT_Hh_
+#ifdef DLIB_SVR_LINEAR_TrAINER_ABSTRACT_Hh_
+
+#include "sparse_vector_abstract.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "../algs.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svr_linear_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                Is either linear_kernel or sparse_linear_kernel.  
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements a trainer for performing epsilon-insensitive support
+                vector regression.  It uses the oca optimizer so it is very efficient at
+                solving this problem when linear kernels are used, making it suitable for
+                use with large datasets. 
+                
+                For an introduction to support vector regression see the following paper:
+                    A Tutorial on Support Vector Regression by Alex J. Smola and Bernhard Scholkopf.
+                Note that this object solves the version of support vector regression
+                defined by equation (3) in the paper, except that we incorporate the bias
+                term into the w vector by appending a 1 to the end of each sample.
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svr_linear_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used to train a
+                  ranking support vector machine.
+                - #get_oca() == oca() (i.e. an instance of oca with default parameters) 
+                - #get_c() == 1
+                - #get_epsilon() == 0.01
+                - #get_epsilon_insensitivity() = 0.1
+                - This object will not be verbose unless be_verbose() is called
+                - #get_max_iterations() == 10000
+                - #learns_nonnegative_weights() == false
+                - #forces_last_weight_to_1() == false
+        !*/
+
+        explicit svr_linear_trainer (
+            const scalar_type& C
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - This object is properly initialized and ready to be used to train a
+                  ranking support vector machine.
+                - #get_oca() == oca() (i.e. an instance of oca with default parameters) 
+                - #get_c() == C
+                - #get_epsilon() == 0.01
+                - #get_epsilon_insensitivity() = 0.1
+                - This object will not be verbose unless be_verbose() is called
+                - #get_max_iterations() == 10000
+                - #learns_nonnegative_weights() == false
+                - #forces_last_weight_to_1() == false
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const; 
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer to
+                  train.  You can think of this epsilon value as saying "solve the
+                  optimization problem until the average regression error is within epsilon
+                  of its optimal value".  See get_epsilon_insensitivity() below for a
+                  definition of "regression error".
+        !*/
+
+        void set_epsilon_insensitivity (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon_insensitivity() == eps
+        !*/
+
+        const scalar_type get_epsilon_insensitivity (
+        ) const;
+        /*!
+            ensures
+                - This object tries to find a function which minimizes the regression error
+                  on a training set.  This error is measured in the following way:
+                    - if (abs(predicted_value - true_labeled_value) < eps) then
+                        - The error is 0.  That is, any function which gets within eps of
+                          the correct output is good enough.
+                    - else
+                        - The error grows linearly once it gets bigger than eps.
+                 
+                  So epsilon-insensitive regression means we do regression but stop trying
+                  to fit a data point once it is "close enough".  This function returns
+                  that eps value which controls what we mean by "close enough".
+        !*/
+
+        unsigned long get_max_iterations (
+        ) const; 
+        /*!
+            ensures
+                - returns the maximum number of iterations the SVM optimizer is allowed to
+                  run before it is required to stop and return a result.
+        !*/
+
+        void set_max_iterations (
+            unsigned long max_iter
+        );
+        /*!
+            ensures
+                - #get_max_iterations() == max_iter
+        !*/
+
+        void be_verbose (
+        );
+        /*!
+            ensures
+                - This object will print status messages to standard out so that a user can
+                  observe the progress of the algorithm.
+        !*/
+
+        void be_quiet (
+        );
+        /*!
+            ensures
+                - this object will not print anything to standard out
+        !*/
+
+        bool forces_last_weight_to_1 (
+        ) const;
+        /*!
+            ensures
+                - returns true if this trainer has the constraint that the last weight in
+                  the learned parameter vector must be 1.  This is the weight corresponding
+                  to the feature in the training vectors with the highest dimension.  
+                - Forcing the last weight to 1 also disables the bias and therefore the b
+                  field of the learned decision_function will be 0 when forces_last_weight_to_1() == true.
+        !*/
+
+        void force_last_weight_to_1 (
+            bool should_last_weight_be_1
+        );
+        /*!
+            ensures
+                - #forces_last_weight_to_1() == should_last_weight_be_1
+        !*/
+
+        void set_oca (
+            const oca& item
+        );
+        /*!
+            ensures
+                - #get_oca() == item 
+        !*/
+
+        const oca get_oca (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the optimizer used to solve the SVM problem.  
+        !*/
+
+        const kernel_type get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object.  Since the
+                  linear kernels don't have any parameters this function just returns
+                  kernel_type()
+        !*/
+
+        bool learns_nonnegative_weights (
+        ) const; 
+        /*!
+            ensures
+                - The output of training is a weight vector and a bias value.  These two
+                  things define the resulting decision function.  That is, the decision
+                  function simply takes the dot product between the learned weight vector
+                  and a test sample, then subtracts the bias value.  Therefore, if
+                  learns_nonnegative_weights() == true then the resulting learned weight
+                  vector will always have non-negative entries.  The bias value may still
+                  be negative though.
+        !*/
+       
+        void set_learns_nonnegative_weights (
+            bool value
+        );
+        /*!
+            ensures
+                - #learns_nonnegative_weights() == value
+        !*/
+
+        void set_c (
+            scalar_type C 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() == C 
+        !*/
+
+        const scalar_type get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVM regularization parameter.  It is the parameter that
+                  determines the trade off between trying to fit the training data exactly
+                  or allowing more errors but hopefully improving the generalization of the
+                  resulting classifier.  Larger values encourage exact fitting while
+                  smaller values of C may encourage better generalization. 
+        !*/
+
+        const decision_function<kernel_type> train (
+            const std::vector<sample_type>& samples,
+            const std::vector<scalar_type>& targets
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(samples,targets) == true
+            ensures
+                - performs support vector regression given the training samples and targets.  
+                - returns a decision_function F with the following properties:
+                    - F(new_sample) == predicted target value for new_sample
+                    - F.alpha.size() == 1
+                    - F.basis_vectors.size() == 1
+                    - F.alpha(0) == 1
+        !*/
+
+    }; 
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVR_LINEAR_TrAINER_ABSTRACT_Hh_
+
+
diff --git a/ml/dlib/dlib/svm/svr_trainer.h b/ml/dlib/dlib/svm/svr_trainer.h
new file mode 100644
index 000000000..bc6378a20
--- /dev/null
+++ b/ml/dlib/dlib/svm/svr_trainer.h
@@ -0,0 +1,393 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_SVm_EPSILON_REGRESSION_TRAINER_Hh_ 
+#define DLIB_SVm_EPSILON_REGRESSION_TRAINER_Hh_
+
+
+#include "svr_trainer_abstract.h"
+#include <cmath>
+#include <limits>
+#include "../matrix.h"
+#include "../algs.h"
+
+#include "function.h"
+#include "kernel.h"
+#include "../optimization/optimization_solve_qp3_using_smo.h"
+
+namespace dlib 
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svr_trainer
+    {
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svr_trainer (
+        ) :
+            C(1),
+            eps_insensitivity(0.1),
+            cache_size(200),
+            eps(0.001)
+        {
+        }
+
+        void set_cache_size (
+            long cache_size_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(cache_size_ > 0,
+                "\tvoid svr_trainer::set_cache_size(cache_size_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t cache_size: " << cache_size_ 
+                );
+            cache_size = cache_size_;
+        }
+
+        long get_cache_size (
+        ) const
+        {
+            return cache_size;
+        }
+
+        void set_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid svr_trainer::set_epsilon(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps_: " << eps_ 
+                );
+            eps = eps_;
+        }
+
+        const scalar_type get_epsilon (
+        ) const
+        { 
+            return eps;
+        }
+
+        void set_epsilon_insensitivity (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\tvoid svr_trainer::set_epsilon_insensitivity(eps_)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t eps_: " << eps_ 
+                );
+            eps_insensitivity = eps_;
+        }
+
+        const scalar_type get_epsilon_insensitivity (
+        ) const
+        { 
+            return eps_insensitivity;
+        }
+
+        void set_kernel (
+            const kernel_type& k
+        )
+        {
+            kernel_function = k;
+        }
+
+        const kernel_type& get_kernel (
+        ) const
+        {
+            return kernel_function;
+        }
+
+        void set_c (
+            scalar_type C_ 
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(C_ > 0,
+                "\t void svr_trainer::set_c()"
+                << "\n\t C must be greater than 0"
+                << "\n\t C_:    " << C_ 
+                << "\n\t this: " << this
+                );
+
+            C = C_;
+        }
+
+        const scalar_type get_c (
+        ) const
+        {
+            return C;
+        }
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            return do_train(mat(x), mat(y));
+        }
+
+        void swap (
+            svr_trainer& item
+        )
+        {
+            exchange(kernel_function, item.kernel_function);
+            exchange(C,            item.C);
+            exchange(eps_insensitivity, item.eps_insensitivity);
+            exchange(cache_size,      item.cache_size);
+            exchange(eps,             item.eps);
+        }
+
+    private:
+
+    // ------------------------------------------------------------------------------------
+
+        template <typename M>
+        struct op_quad 
+        {
+            explicit op_quad( 
+                const M& m_
+            ) : m(m_) {}
+
+            const M& m;
+
+            typedef typename M::type type;
+            typedef type const_ret_type;
+            const static long cost = M::cost + 2;
+
+            inline const_ret_type apply ( long r, long c) const
+            { 
+                if (r < m.nr())
+                {
+                    if (c < m.nc())
+                    {
+                        return m(r,c);
+                    }
+                    else
+                    {
+                        return -m(r,c-m.nc());
+                    }
+                }
+                else
+                {
+                    if (c < m.nc())
+                    {
+                        return -m(r-m.nr(),c);
+                    }
+                    else
+                    {
+                        return m(r-m.nr(),c-m.nc());
+                    }
+                }
+            }
+
+            const static long NR = 2*M::NR;
+            const static long NC = 2*M::NC;
+            typedef typename M::mem_manager_type mem_manager_type;
+            typedef typename M::layout_type layout_type;
+
+            long nr () const { return 2*m.nr(); }
+            long nc () const { return 2*m.nc(); }
+
+            template <typename U> bool aliases               ( const matrix_exp<U>& item) const 
+            { return m.aliases(item); }
+            template <typename U> bool destructively_aliases ( const matrix_exp<U>& item) const 
+            { return m.aliases(item); }
+        };
+
+        template <
+            typename EXP
+            >
+        const matrix_op<op_quad<EXP> >  make_quad (
+            const matrix_exp<EXP>& m
+        ) const
+        /*!
+            ensures
+                - returns the following matrix:
+                     m -m
+                    -m  m
+                - I.e. returns a matrix that is twice the size of m and just
+                  contains copies of m and -m
+        !*/
+        {
+            typedef op_quad<EXP> op;
+            return matrix_op<op>(op(m.ref()));
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> do_train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const
+        {
+            typedef typename K::scalar_type scalar_type;
+            typedef typename decision_function<K>::sample_vector_type sample_vector_type;
+            typedef typename decision_function<K>::scalar_vector_type scalar_vector_type;
+
+            // make sure requires clause is not broken
+            DLIB_ASSERT(is_learning_problem(x,y) == true,
+                "\tdecision_function svr_trainer::train(x,y)"
+                << "\n\t invalid inputs were given to this function"
+                << "\n\t x.nr(): " << x.nr() 
+                << "\n\t y.nr(): " << y.nr() 
+                << "\n\t x.nc(): " << x.nc() 
+                << "\n\t y.nc(): " << y.nc() 
+                );
+
+
+            scalar_vector_type alpha;
+
+            solve_qp3_using_smo<scalar_vector_type> solver;
+
+            solver(symmetric_matrix_cache<float>(make_quad(kernel_matrix(kernel_function,x)), cache_size), 
+                   uniform_matrix<scalar_type>(2*x.size(),1, eps_insensitivity) + join_cols(y,-y),
+                   join_cols(uniform_matrix<scalar_type>(x.size(),1,1), uniform_matrix<scalar_type>(x.size(),1,-1)), 
+                   0,
+                   C,
+                   C,
+                   alpha,
+                   eps);
+
+            scalar_type b;
+            calculate_b(alpha,solver.get_gradient(),C,b);
+
+            alpha = -rowm(alpha,range(0,x.size()-1)) + rowm(alpha,range(x.size(), alpha.size()-1));
+            
+            // count the number of support vectors
+            const long sv_count = (long)sum(alpha != 0);
+
+            scalar_vector_type sv_alpha;
+            sample_vector_type support_vectors;
+
+            // size these column vectors so that they have an entry for each support vector
+            sv_alpha.set_size(sv_count);
+            support_vectors.set_size(sv_count);
+
+            // load the support vectors and their alpha values into these new column matrices
+            long idx = 0;
+            for (long i = 0; i < alpha.nr(); ++i)
+            {
+                if (alpha(i) != 0)
+                {
+                    sv_alpha(idx) = alpha(i);
+                    support_vectors(idx) = x(i);
+                    ++idx;
+                }
+            }
+
+            // now return the decision function
+            return decision_function<K> (sv_alpha, -b, kernel_function, support_vectors);
+        }
+
+    // ------------------------------------------------------------------------------------
+
+        template <
+            typename scalar_vector_type
+            >
+        void calculate_b(
+            const scalar_vector_type& alpha,
+            const scalar_vector_type& df,
+            const scalar_type& C,
+            scalar_type& b
+        ) const
+        {
+            using namespace std;
+            long num_free = 0;
+            scalar_type sum_free = 0;
+
+            scalar_type upper_bound = -numeric_limits<scalar_type>::infinity();
+            scalar_type lower_bound = numeric_limits<scalar_type>::infinity();
+
+            find_min_and_max(df, upper_bound, lower_bound);
+
+            for(long i = 0; i < alpha.nr(); ++i)
+            {
+                if(i < alpha.nr()/2)
+                {
+                    if(alpha(i) == C)
+                    {
+                        if (df(i) > upper_bound)
+                            upper_bound = df(i);
+                    }
+                    else if(alpha(i) == 0)
+                    {
+                        if (df(i) < lower_bound)
+                            lower_bound = df(i);
+                    }
+                    else
+                    {
+                        ++num_free;
+                        sum_free += df(i);
+                    }
+                }
+                else
+                {
+                    if(alpha(i) == C)
+                    {
+                        if (-df(i) < lower_bound)
+                            lower_bound = -df(i);
+                    }
+                    else if(alpha(i) == 0)
+                    {
+                        if (-df(i) > upper_bound)
+                            upper_bound = -df(i);
+                    }
+                    else
+                    {
+                        ++num_free;
+                        sum_free -= df(i);
+                    }
+                }
+            }
+
+            if(num_free > 0)
+                b = sum_free/num_free;
+            else
+                b = (upper_bound+lower_bound)/2;
+        }
+
+    // ------------------------------------------------------------------------------------
+
+
+        kernel_type kernel_function;
+        scalar_type C;
+        scalar_type eps_insensitivity;
+        long cache_size;
+        scalar_type eps;
+    }; // end of class svr_trainer
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename K>
+    void swap (
+        svr_trainer<K>& a,
+        svr_trainer<K>& b
+    ) { a.swap(b); }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_EPSILON_REGRESSION_TRAINER_Hh_
+
diff --git a/ml/dlib/dlib/svm/svr_trainer_abstract.h b/ml/dlib/dlib/svm/svr_trainer_abstract.h
new file mode 100644
index 000000000..c1dd5f1f3
--- /dev/null
+++ b/ml/dlib/dlib/svm/svr_trainer_abstract.h
@@ -0,0 +1,209 @@
+// Copyright (C) 2010  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_SVm_EPSILON_REGRESSION_TRAINER_ABSTRACT_
+#ifdef DLIB_SVm_EPSILON_REGRESSION_TRAINER_ABSTRACT_
+
+#include <cmath>
+#include <limits>
+#include "../matrix/matrix_abstract.h"
+#include "../algs.h"
+#include "function_abstract.h"
+#include "kernel_abstract.h"
+#include "../optimization/optimization_solve_qp3_using_smo_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename K 
+        >
+    class svr_trainer
+    {
+        /*!
+            REQUIREMENTS ON K 
+                is a kernel function object as defined in dlib/svm/kernel_abstract.h 
+
+            WHAT THIS OBJECT REPRESENTS
+                This object implements a trainer for performing epsilon-insensitive support 
+                vector regression.  It is implemented using the SMO algorithm.
+
+                The implementation of the eps-SVR training algorithm used by this object is based
+                on the following paper:
+                    - Chih-Chung Chang and Chih-Jen Lin, LIBSVM : a library for support vector 
+                      machines, 2001. Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm
+        !*/
+
+    public:
+        typedef K kernel_type;
+        typedef typename kernel_type::scalar_type scalar_type;
+        typedef typename kernel_type::sample_type sample_type;
+        typedef typename kernel_type::mem_manager_type mem_manager_type;
+        typedef decision_function<kernel_type> trained_function_type;
+
+        svr_trainer (
+        );
+        /*!
+            ensures
+                - This object is properly initialized and ready to be used
+                  to train a support vector machine.
+                - #get_c() == 1
+                - #get_epsilon_insensitivity() == 0.1
+                - #get_cache_size() == 200
+                - #get_epsilon() == 0.001
+        !*/
+
+        void set_cache_size (
+            long cache_size
+        );
+        /*!
+            requires
+                - cache_size > 0
+            ensures
+                - #get_cache_size() == cache_size 
+        !*/
+
+        const long get_cache_size (
+        ) const;
+        /*!
+            ensures
+                - returns the number of megabytes of cache this object will use
+                  when it performs training via the this->train() function.
+                  (bigger values of this may make training go faster but won't affect 
+                  the result.  However, too big a value will cause you to run out of 
+                  memory, obviously.)
+        !*/
+
+        void set_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon() == eps 
+        !*/
+
+        const scalar_type get_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the error epsilon that determines when training should stop.
+                  Generally a good value for this is 0.001.  Smaller values may result
+                  in a more accurate solution but take longer to execute.
+        !*/
+
+        void set_epsilon_insensitivity (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_epsilon_insensitivity() == eps
+        !*/
+
+        const scalar_type get_epsilon_insensitivity (
+        ) const;
+        /*!
+            ensures
+                - This object tries to find a function which minimizes the
+                  regression error on a training set.  This error is measured
+                  in the following way:
+                    - if (abs(predicted_value - true_labeled_value) < eps) then
+                        - The error is 0.  That is, any function which gets within
+                          eps of the correct output is good enough.
+                    - else
+                        - The error grows linearly once it gets bigger than eps
+                 
+                  So epsilon-insensitive regression means we do regression but 
+                  stop trying to fit a data point once it is "close enough".  
+                  This function returns that eps value which controls what we 
+                  mean by "close enough".
+        !*/
+
+        void set_kernel (
+            const kernel_type& k
+        );
+        /*!
+            ensures
+                - #get_kernel() == k 
+        !*/
+
+        const kernel_type& get_kernel (
+        ) const;
+        /*!
+            ensures
+                - returns a copy of the kernel function in use by this object
+        !*/
+
+        void set_c (
+            scalar_type C 
+        );
+        /*!
+            requires
+                - C > 0
+            ensures
+                - #get_c() == C 
+        !*/
+
+        const scalar_type get_c (
+        ) const;
+        /*!
+            ensures
+                - returns the SVR regularization parameter.  It is the parameter that 
+                  determines the trade-off between trying to reduce the training error 
+                  or allowing more errors but hopefully improving the generalization 
+                  of the resulting decision_function.  Larger values encourage exact 
+                  fitting while smaller values of C may encourage better generalization. 
+        !*/
+
+        template <
+            typename in_sample_vector_type,
+            typename in_scalar_vector_type
+            >
+        const decision_function<kernel_type> train (
+            const in_sample_vector_type& x,
+            const in_scalar_vector_type& y
+        ) const;
+        /*!
+            requires
+                - is_learning_problem(x,y) == true
+                - x == a matrix or something convertible to a matrix via mat().
+                  Also, x should contain sample_type objects.
+                - y == a matrix or something convertible to a matrix via mat().
+                  Also, y should contain scalar_type objects.
+            ensures
+                - performs support vector regression given the training samples in x and 
+                  target values in y.  
+                - returns a decision_function F with the following properties:
+                    - F(new_x) == predicted y value
+        !*/
+
+        void swap (
+            svr_trainer& item
+        );
+        /*!
+            ensures
+                - swaps *this and item
+        !*/
+    }; 
+
+    template <typename K>
+    void swap (
+        svr_trainer<K>& a,
+        svr_trainer<K>& b
+    ) { a.swap(b); }
+    /*!
+        provides a global swap
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_SVm_EPSILON_REGRESSION_TRAINER_ABSTRACT_
+
+
+
diff --git a/ml/dlib/dlib/svm/track_association_function.h b/ml/dlib/dlib/svm/track_association_function.h
new file mode 100644
index 000000000..bf5ef36c7
--- /dev/null
+++ b/ml/dlib/dlib/svm/track_association_function.h
@@ -0,0 +1,154 @@
+// Copyright (C) 2014  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_TRACK_ASSOCiATION_FUNCTION_Hh_
+#define DLIB_TRACK_ASSOCiATION_FUNCTION_Hh_
+
+
+#include "track_association_function_abstract.h"
+#include <vector>
+#include <iostream>
+#include "../algs.h"
+#include "../serialize.h"
+#include "assignment_function.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename detection_type
+        > 
+    class feature_extractor_track_association
+    {
+    public:
+        typedef typename detection_type::track_type track_type;
+        typedef typename track_type::feature_vector_type feature_vector_type;
+
+        typedef detection_type lhs_element;
+        typedef track_type rhs_element;
+
+        feature_extractor_track_association() : num_dims(0), num_nonnegative(0) {}
+
+        explicit feature_extractor_track_association (
+            unsigned long num_dims_,
+            unsigned long num_nonnegative_
+        ) : num_dims(num_dims_), num_nonnegative(num_nonnegative_) {}
+
+        unsigned long num_features(
+        ) const { return num_dims; }
+
+        unsigned long num_nonnegative_weights (
+        ) const { return num_nonnegative; }
+
+        void get_features (
+            const detection_type& det,
+            const track_type& track,
+            feature_vector_type& feats
+        ) const
+        {
+            track.get_similarity_features(det, feats);
+        }
+
+        friend void serialize (const feature_extractor_track_association& item, std::ostream& out) 
+        { 
+            serialize(item.num_dims, out);
+            serialize(item.num_nonnegative, out);
+        }
+
+        friend void deserialize (feature_extractor_track_association& item, std::istream& in) 
+        {
+            deserialize(item.num_dims, in);
+            deserialize(item.num_nonnegative, in);
+        }
+
+    private:
+        unsigned long num_dims;
+        unsigned long num_nonnegative;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename detection_type_
+        >
+    class track_association_function
+    {
+    public:
+
+        typedef detection_type_ detection_type;
+        typedef typename detection_type::track_type track_type;
+        typedef assignment_function<feature_extractor_track_association<detection_type> > association_function_type;
+
+        track_association_function() {}
+
+        track_association_function (
+            const association_function_type& assoc_
+        ) : assoc(assoc_)
+        {
+        }
+
+        const association_function_type& get_assignment_function (
+        ) const
+        {
+            return assoc;
+        }
+
+        void operator() (
+            std::vector<track_type>& tracks,
+            const std::vector<detection_type>& dets
+        ) const
+        {
+            std::vector<long> assignments = assoc(dets, tracks);
+            std::vector<bool> updated_track(tracks.size(), false);
+            // now update all the tracks with the detections that associated to them.
+            for (unsigned long i = 0; i < assignments.size(); ++i)
+            {
+                if (assignments[i] != -1)
+                {
+                    tracks[assignments[i]].update_track(dets[i]);
+                    updated_track[assignments[i]] = true;
+                }
+                else
+                {
+                    track_type new_track;
+                    new_track.update_track(dets[i]);
+                    tracks.push_back(new_track);
+                }
+            }
+
+            // Now propagate all the tracks that didn't get any detections.
+            for (unsigned long i = 0; i < updated_track.size(); ++i)
+            {
+                if (!updated_track[i])
+                    tracks[i].propagate_track();
+            }
+        }
+
+        friend void serialize (const track_association_function& item, std::ostream& out)
+        {
+            int version = 1;
+            serialize(version, out);
+            serialize(item.assoc, out);
+        }
+        friend void deserialize (track_association_function& item, std::istream& in)
+        {
+            int version = 0;
+            deserialize(version, in);
+            if (version != 1)
+                throw serialization_error("Unexpected version found while deserializing dlib::track_association_function.");
+
+            deserialize(item.assoc, in);
+        }
+
+    private:
+
+        assignment_function<feature_extractor_track_association<detection_type> > assoc;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_TRACK_ASSOCiATION_FUNCTION_Hh_
+
diff --git a/ml/dlib/dlib/svm/track_association_function_abstract.h b/ml/dlib/dlib/svm/track_association_function_abstract.h
new file mode 100644
index 000000000..8a6fe153c
--- /dev/null
+++ b/ml/dlib/dlib/svm/track_association_function_abstract.h
@@ -0,0 +1,271 @@
+// Copyright (C) 2014  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_TRACK_ASSOCiATION_FUNCTION_ABSTRACT_Hh_
+#ifdef DLIB_TRACK_ASSOCiATION_FUNCTION_ABSTRACT_Hh_
+
+#include <vector>
+#include "assignment_function_abstract.h"
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class example_detection
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object defines the interface a detection must implement if it is to be
+                used with the track_association_function defined at the bottom of this
+                file.  In this case, the interface is very simple.  A detection object is
+                only required to define the track_type typedef and it must also be possible
+                to store detection objects in a std::vector.
+        !*/
+
+    public:
+        // Each detection object should be designed to work with a specific track object.
+        // This typedef lets us determine which track type is meant for use with this
+        // detection object.
+        typedef class example_track track_type;
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class example_track
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object defines the interface a track must implement if it is to be
+                used with the track_association_function defined at the bottom of this
+                file.   
+        !*/
+
+    public:
+        // This type should be a dlib::matrix capable of storing column vectors or an
+        // unsorted sparse vector type as defined in dlib/svm/sparse_vector_abstract.h.
+        typedef matrix_or_sparse_vector_type feature_vector_type;
+
+        example_track(
+        );
+        /*!
+            ensures
+                - this object is properly initialized
+        !*/
+
+        void get_similarity_features (
+            const example_detection& det,
+            feature_vector_type& feats
+        ) const;
+        /*!
+            requires
+                - update_track() has been called on this track at least once.
+            ensures
+                - #feats == A feature vector that contains information describing how
+                  likely it is that det is a detection from the object corresponding to
+                  this track.  That is, the feature vector should contain information that
+                  lets someone decide if det should be associated to this track.
+                - #feats.size() must be a constant.  That is, every time we call
+                  get_similarity_features() it must output a feature vector of the same
+                  dimensionality.
+        !*/
+
+        void update_track (
+            const example_detection& det
+        );
+        /*!
+            ensures
+                - Updates this track with the given detection assuming that det is the most
+                  current observation of the object under track. 
+        !*/
+
+        void propagate_track (
+        );
+        /*!
+            ensures
+                - propagates this track forward in time one time step.
+        !*/
+    };
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename detection_type
+        > 
+    class feature_extractor_track_association
+    {
+        /*!
+            REQUIREMENTS ON detection_type
+                It must be an object that implements an interface compatible with the
+                example_detection discussed above.  This also means that detection_type::track_type 
+                must be an object that implements an interface compatible with example_track 
+                defined above.
+
+            WHAT THIS OBJECT REPRESENTS 
+                This object is an adapter that converts from the detection/track style
+                interface defined above to the feature extraction interface required by the
+                association rule learning tools in dlib.  Specifically, it converts the
+                detection/track interface into a form usable by the assignment_function and
+                its trainer object structural_assignment_trainer.
+        !*/
+
+    public:
+        typedef typename detection_type::track_type track_type;
+        typedef typename track_type::feature_vector_type feature_vector_type;
+        typedef detection_type lhs_element;
+        typedef track_type rhs_element;
+
+        unsigned long num_features(
+        ) const; 
+        /*!
+            ensures
+                - returns the dimensionality of the feature vectors produced by get_features().
+        !*/
+
+        void get_features (
+            const detection_type& det,
+            const track_type& track,
+            feature_vector_type& feats
+        ) const;
+        /*!
+            ensures
+                - performs: track.get_similarity_features(det, feats);
+        !*/
+    };
+
+    template <
+        typename detection_type
+        > 
+    void serialize (
+        const feature_extractor_track_association<detection_type>& item, 
+        std::ostream& out
+    );
+    /*!
+        Provides serialization support.
+    !*/
+
+    template <
+        typename detection_type
+        > 
+    void deserialize (
+        feature_extractor_track_association<detection_type>& item,
+        std::istream& in
+    );
+    /*!
+        Provides deserialization support.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <
+        typename detection_type_
+        >
+    class track_association_function
+    {
+        /*!
+            REQUIREMENTS ON detection_type
+                It must be an object that implements an interface compatible with the
+                example_detection discussed above.  This also means that detection_type::track_type 
+                must be an object that implements an interface compatible with example_track 
+                defined above.
+
+            WHAT THIS OBJECT REPRESENTS
+                This object is a tool that helps you implement an object tracker.  So for
+                example, if you wanted to track people moving around in a video then this
+                object can help.  In particular, imagine you have a tool for detecting the
+                positions of each person in an image.  Then you can run this person
+                detector on the video and at each time step, i.e. at each frame, you get a
+                set of person detections.  However, that by itself doesn't tell you how
+                many people there are in the video and where they are moving to and from.
+                To get that information you need to figure out which detections match each
+                other from frame to frame.  This is where the track_association_function
+                comes in.  It performs the detection to track association.  It will also do
+                some of the track management tasks like creating a new track when a
+                detection doesn't match any of the existing tracks.
+
+                Internally, this object is implemented using the assignment_function object.  
+                In fact, it's really just a thin wrapper around assignment_function and
+                exists just to provide a more convenient interface to users doing detection
+                to track association.   
+        !*/
+    public:
+
+        typedef detection_type_ detection_type;
+        typedef typename detection_type::track_type track_type;
+        typedef assignment_function<feature_extractor_track_association<detection_type> > association_function_type;
+
+        track_association_function(
+        );
+        /*!
+            ensures
+                - #get_assignment_function() will be default initialized.
+        !*/
+
+        track_association_function (
+            const association_function_type& assoc
+        ); 
+        /*!
+            ensures
+                - #get_assignment_function() == assoc
+        !*/
+
+        const association_function_type& get_assignment_function (
+        ) const;
+        /*!
+            ensures
+                - returns the assignment_function used by this object to assign detections
+                  to tracks.
+        !*/
+
+        void operator() (
+            std::vector<track_type>& tracks,
+            const std::vector<detection_type>& dets
+        ) const;
+        /*!
+            ensures
+                - This function uses get_assignment_function() to assign each detection
+                  in dets to its appropriate track in tracks.  Then each track which
+                  associates to a detection is updated by calling update_track() with the
+                  associated detection.  
+                - Detections that don't associate with any of the elements of tracks will
+                  spawn new tracks.  For each unassociated detection, this is done by
+                  creating a new track_type object, calling update_track() on it with the
+                  new detection, and then adding the new track into tracks.
+                - Tracks that don't have a detection associate to them are propagated
+                  forward in time by calling propagate_track() on them.  That is, we call
+                  propagate_track() only on tracks that do not get associated with a
+                  detection.
+        !*/
+    };
+
+    template <
+        typename detection_type
+        > 
+    void serialize (
+        const track_association_function<detection_type>& item,
+        std::ostream& out
+    );
+    /*!
+        Provides serialization support.
+    !*/
+
+    template <
+        typename detection_type
+        > 
+    void deserialize (
+        track_association_function<detection_type>& item, 
+        std::istream& in
+    );
+    /*!
+        Provides deserialization support.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_TRACK_ASSOCiATION_FUNCTION_ABSTRACT_Hh_
+
+