1 files changed, 0 insertions, 348 deletions
diff --git a/ml/dlib/dlib/svm/structural_svm_problem_abstract.h b/ml/dlib/dlib/svm/structural_svm_problem_abstract.h
deleted file mode 100644
index 20b3d73a7..000000000
--- a/ml/dlib/dlib/svm/structural_svm_problem_abstract.h
+++ /dev/null
@@ -1,348 +0,0 @@
-// Copyright (C) 2011  Davis E. King (davis@dlib.net)
-// License: Boost Software License   See LICENSE.txt for the full license.
-#undef DLIB_STRUCTURAL_SVM_PRObLEM_ABSTRACT_Hh_
-#ifdef DLIB_STRUCTURAL_SVM_PRObLEM_ABSTRACT_Hh_
-
-#include "../optimization/optimization_oca_abstract.h"
-#include "sparse_vector_abstract.h"
-#include "../matrix.h"
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
-    template <
-        typename matrix_type_,
-        typename feature_vector_type_ = matrix_type_
-        >
-    class structural_svm_problem : public oca_problem<matrix_type_> 
-    {
-    public:
-        /*!
-            REQUIREMENTS ON matrix_type_
-                - matrix_type_ == a dlib::matrix capable of storing column vectors
-
-            REQUIREMENTS ON feature_vector_type_ 
-                - feature_vector_type_ == a dlib::matrix capable of storing column vectors
-                  or an unsorted sparse vector type as defined in dlib/svm/sparse_vector_abstract.h.
-
-            INITIAL VALUE
-                - get_epsilon() == 0.001
-                - get_max_iterations() == 10000
-                - get_max_cache_size() == 5
-                - get_c() == 1
-                - get_cache_based_epsilon() == std::numeric_limits<scalar_type>::infinity()
-                  (I.e. the cache based epsilon feature is disabled)
-                - num_nuclear_norm_regularizers() == 0
-                - This object will not be verbose
-
-            WHAT THIS OBJECT REPRESENTS
-                This object is a tool for solving the optimization problem associated with
-                a structural support vector machine.  A structural SVM is a supervised
-                machine learning method for learning to predict complex outputs.  This is
-                contrasted with a binary classifier which makes only simple yes/no
-                predictions.  A structural SVM, on the other hand, can learn to predict
-                complex outputs such as entire parse trees or DNA sequence alignments.  To
-                do this, it learns a function F(x,y) which measures how well a particular
-                data sample x matches a label y.  When used for prediction, the best label
-                for a new x is given by the y which maximizes F(x,y).   
-
-                To use this object you inherit from it, provide implementations of its four
-                pure virtual functions, and then pass your object to the oca optimizer.
-                Also, you should only pass an instance of this object to the oca optimizer
-                once.  That is, the act of using a structural_svm_problem instance with the
-                oca solver "uses" the structural_svm_problem instance.  If you want to
-                solve the same problem multiple times then you must use a fresh instance of
-                your structural_svm_problem.
-
-
-                To define the optimization problem precisely, we first introduce some notation:
-                    - let PSI(x,y)    == the joint feature vector for input x and a label y.
-                    - let F(x,y|w)    == dot(w,PSI(x,y)).  
-                    - let LOSS(idx,y) == the loss incurred for predicting that the idx-th training 
-                      sample has a label of y.  Note that LOSS() should always be >= 0 and should
-                      become exactly 0 when y is the correct label for the idx-th sample.
-                    - let x_i == the i-th training sample.
-                    - let y_i == the correct label for the i-th training sample.
-                    - The number of data samples is N.
-
-                Then the optimization problem solved using this object is the following:
-                    Minimize: h(w) == 0.5*dot(w,w) + C*R(w)
-
-                    Where R(w) == sum from i=1 to N: 1/N * sample_risk(i,w)
-                    and sample_risk(i,w) == max over all Y: LOSS(i,Y) + F(x_i,Y|w) - F(x_i,y_i|w)
-                    and C > 0
-
-                
-
-                For an introduction to structured support vector machines you should consult 
-                the following paper: 
-                    Predicting Structured Objects with Support Vector Machines by 
-                    Thorsten Joachims, Thomas Hofmann, Yisong Yue, and Chun-nam Yu
-
-                For a more detailed discussion of the particular algorithm implemented by this
-                object see the following paper:  
-                    T. Joachims, T. Finley, Chun-Nam Yu, Cutting-Plane Training of Structural SVMs, 
-                    Machine Learning, 77(1):27-59, 2009.
-
-                    Note that this object is essentially a tool for solving the 1-Slack structural
-                    SVM with margin-rescaling.  Specifically, see Algorithm 3 in the above referenced 
-                    paper.
-        !*/
-
-        typedef matrix_type_ matrix_type;
-        typedef typename matrix_type::type scalar_type;
-        typedef feature_vector_type_ feature_vector_type;
-
-        structural_svm_problem (
-        );
-        /*!
-            ensures
-                - this object is properly initialized
-        !*/
-
-        void set_epsilon (
-            scalar_type eps
-        );
-        /*!
-            requires
-                - eps > 0
-            ensures
-                - #get_epsilon() == eps
-        !*/
-
-        const scalar_type get_epsilon (
-        ) const;
-        /*!
-            ensures
-                - returns the error epsilon that determines when training should stop.
-                  Smaller values may result in a more accurate solution but take longer 
-                  to execute.  Specifically, the algorithm stops when the average sample
-                  risk (i.e. R(w) as defined above) is within epsilon of its optimal value.
-
-                  Also note that sample risk is an upper bound on a sample's loss.  So
-                  you can think of this epsilon value as saying "solve the optimization
-                  problem until the average loss per sample is within epsilon of it's 
-                  optimal value".
-        !*/
-
-        scalar_type get_cache_based_epsilon (
-        ) const;
-        /*!
-            ensures
-                - if (get_max_cache_size() != 0) then
-                    - The solver will not stop when the average sample risk is within
-                      get_epsilon() of its optimal value.  Instead, it will keep running
-                      but will run the optimizer completely on the cache until the average
-                      sample risk is within #get_cache_based_epsilon() of its optimal
-                      value.  This means that it will perform this additional refinement in
-                      the solution accuracy without making any additional calls to the
-                      separation_oracle().  This is useful when using a nuclear norm
-                      regularization term because it allows you to quickly solve the
-                      optimization problem to a high precision, which in the case of a
-                      nuclear norm regularized problem means that many of the learned
-                      matrices will be low rank or very close to low rank due to the
-                      nuclear norm regularizer.  This may not happen without solving the
-                      problem to a high accuracy or their ranks may be difficult to
-                      determine, so the extra accuracy given by the cache based refinement
-                      is very useful.  Finally, note that we include the nuclear norm term
-                      as part of the "risk" for the purposes of determining when to stop.  
-                - else
-                    - The value of #get_cache_based_epsilon() has no effect.
-        !*/
-
-        void set_cache_based_epsilon (
-            scalar_type eps
-        );
-        /*!
-            requires
-                - eps > 0
-            ensures
-                - #get_cache_based_epsilon() == eps
-        !*/
-
-        void set_max_iterations (
-            unsigned long max_iter
-        );
-        /*!
-            ensures
-                - #get_max_iterations() == max_iter
-        !*/
-
-        unsigned long get_max_iterations (
-        ); 
-        /*!
-            ensures
-                - returns the maximum number of iterations the SVM optimizer is allowed to
-                  run before it is required to stop and return a result.
-        !*/
-
-        void set_max_cache_size (
-            unsigned long max_size
-        );
-        /*!
-            ensures
-                - #get_max_cache_size() == max_size
-        !*/
-
-        unsigned long get_max_cache_size (
-        ) const; 
-        /*!
-            ensures
-                - Returns the number of joint feature vectors per training sample kept in 
-                  the separation oracle cache.  This cache is used to avoid unnecessary 
-                  calls to the user supplied separation_oracle() function.  Note that a 
-                  value of 0 means that caching is not used at all.  This is appropriate 
-                  if the separation oracle is cheap to evaluate. 
-        !*/
-
-        void add_nuclear_norm_regularizer (
-            long first_dimension,
-            long rows,
-            long cols,
-            double regularization_strength
-        );
-        /*!
-            requires
-                - 0 <= first_dimension < get_num_dimensions()
-                - 0 <= rows
-                - 0 <= cols
-                - first_dimension+rows*cols <= get_num_dimensions()
-                - 0 < regularization_strength
-            ensures
-                - Adds a nuclear norm regularization term to the optimization problem
-                  solved by this object.  That is, instead of solving:
-                    Minimize: h(w) == 0.5*dot(w,w) + C*R(w)
-                  this object will solve:
-                    Minimize: h(w) == 0.5*dot(w,w) + C*R(w) + regularization_strength*nuclear_norm_of(part of w)
-                  where "part of w" is the part of w indicated by the arguments to this
-                  function. In particular, the part of w included in the nuclear norm is
-                  exactly the matrix reshape(rowm(w, range(first_dimension, first_dimension+rows*cols-1)), rows, cols).
-                  Therefore, if you think of the w vector as being the concatenation of a
-                  bunch of matrices then you can use multiple calls to add_nuclear_norm_regularizer() 
-                  to add nuclear norm regularization terms to any of the matrices packed into w.
-                - #num_nuclear_norm_regularizers() == num_nuclear_norm_regularizers() + 1
-        !*/
-
-        unsigned long num_nuclear_norm_regularizers (
-        ) const; 
-        /*!
-            ensures
-                - returns the number of nuclear norm regularizers that are currently a part
-                  of this optimization problem.  That is, returns the number of times
-                  add_nuclear_norm_regularizer() has been called since the last call to
-                  clear_nuclear_norm_regularizers() or object construction, whichever is
-                  most recent.
-        !*/
-
-        void clear_nuclear_norm_regularizers (
-        );
-        /*!
-            ensures
-                - #num_nuclear_norm_regularizers() == 0
-        !*/
-
-        void be_verbose (
-        );
-        /*!
-            ensures
-                - This object will print status messages to standard out so that a 
-                  user can observe the progress of the algorithm.
-        !*/
-
-        void be_quiet(
-        );
-        /*!
-            ensures
-                - this object will not print anything to standard out
-        !*/
-
-        scalar_type get_c (
-        ) const; 
-        /*!
-            ensures
-                - returns the SVM regularization parameter.  It is the parameter that 
-                  determines the trade off between trying to fit the training data 
-                  exactly or allowing more errors but hopefully improving the 
-                  generalization of the resulting classifier.  Larger values encourage 
-                  exact fitting while smaller values of C may encourage better 
-                  generalization. 
-        !*/
-
-        void set_c (
-            scalar_type C
-        );
-        /*!
-            requires
-                - C > 0
-            ensures
-                - #get_c() == C
-        !*/
-
-    // --------------------------------
-    //     User supplied routines
-    // --------------------------------
-
-        virtual long get_num_dimensions (
-        ) const = 0;
-        /*!
-            ensures
-                - returns the dimensionality of a joint feature vector
-        !*/
-
-        virtual long get_num_samples (
-        ) const = 0;
-        /*!
-            ensures
-                - returns the number of training samples in this problem. 
-        !*/
-
-        virtual void get_truth_joint_feature_vector (
-            long idx,
-            feature_vector_type& psi 
-        ) const = 0;
-        /*!
-            requires
-                - 0 <= idx < get_num_samples()
-            ensures
-                - #psi == PSI(x_idx, y_idx)
-                  (i.e. the joint feature vector for the idx-th training sample its true label.)
-        !*/
-
-        virtual void separation_oracle (
-            const long idx,
-            const matrix_type& current_solution,
-            scalar_type& loss,
-            feature_vector_type& psi
-        ) const = 0;
-        /*!
-            requires
-                - 0 <= idx < get_num_samples()
-                - current_solution.size() == get_num_dimensions()
-            ensures
-                - runs the separation oracle on the idx-th sample.  We define this as follows: 
-                    - let X           == the idx-th training sample.
-                    - let PSI(X,y)    == the joint feature vector for input X and an arbitrary label y.
-                    - let F(X,y)      == dot(current_solution,PSI(X,y)).  
-                    - let LOSS(idx,y) == the loss incurred for predicting that the idx-th sample
-                      has a label of y.  Note that LOSS() should always be >= 0 and should
-                      become exactly 0 when y is the correct label for the idx-th sample.
-
-                        Then the separation oracle finds a Y such that: 
-                            Y = argmax over all y: LOSS(idx,y) + F(X,y) 
-                            (i.e. It finds the label which maximizes the above expression.)
-
-                        Finally, we can define the outputs of this function as:
-                        - #loss == LOSS(idx,Y) 
-                        - #psi == PSI(X,Y) 
-        !*/
-    };
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_STRUCTURAL_SVM_PRObLEM_ABSTRACT_Hh_
-
-