Added a relative epsilon termination option to svm_c_linear_trainer

59d1b9d8 · Davis King · 45731b86 · 59d1b9d8 · 59d1b9d8
Commit 59d1b9d8 authored Jan 12, 2020 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 63 additions and 13 deletions

dlib/svm/svm_c_linear_trainer.h dlib/svm/svm_c_linear_trainer.h +33 -13

dlib/svm/svm_c_linear_trainer_abstract.h dlib/svm/svm_c_linear_trainer_abstract.h +30 -0

No files found.
--- a/dlib/svm/svm_c_linear_trainer.h
+++ b/dlib/svm/svm_c_linear_trainer.h
@@ -44,6 +44,7 @@ namespace dlib
            const in_scalar_vector_type& labels_,
            const bool be_verbose_,
            const scalar_type eps_,
+            const scalar_type relative_eps_,
            const unsigned long max_iter,
            const unsigned long dims_
        ) :
@@ -54,6 +55,7 @@ namespace dlib
            Cneg(C_neg/C),
            be_verbose(be_verbose_),
            eps(eps_),
+            relative_eps(relative_eps_),
            max_iterations(max_iter),
            dims(dims_)
        {
@@ -98,6 +100,11 @@ namespace dlib
            if (num_iterations >= max_iterations)
                return true;
+            // relative eps test
+            if (current_risk_gap <= relative_eps * current_risk_value)
+                return true;
+            // absolute eps test
            if (current_risk_gap < eps)
                return true;
@@ -301,6 +308,7 @@ namespace dlib
        const bool be_verbose;
        const scalar_type eps;
+        const scalar_type relative_eps;
        const unsigned long max_iterations;
        const unsigned long dims;
    };
@@ -320,12 +328,13 @@ namespace dlib
        const in_scalar_vector_type& labels,
        const bool be_verbose,
        const scalar_type eps,
+        const scalar_type relative_eps,
        const unsigned long max_iterations,
        const unsigned long dims
    )
    {
        return oca_problem_c_svm<matrix_type, in_sample_vector_type, in_scalar_vector_type>(
-            C_pos, C_neg, samples, labels, be_verbose, eps, max_iterations, dims);
+            C_pos, C_neg, samples, labels, be_verbose, eps, relative_eps, max_iterations, dims);
    }
 // ----------------------------------------------------------------------------------------
@@ -350,16 +359,7 @@ namespace dlib
                             is_same_type<K, sparse_linear_kernel<sample_type> >::value ));
        svm_c_linear_trainer (
-        )
+        ) : svm_c_linear_trainer(1.0) {}
-        {
-            Cpos = 1;
-            Cneg = 1;
-            verbose = false;
-            eps = 0.001;
-            max_iterations = 10000;
-            learn_nonnegative_weights = false;
-            last_weight_1 = false;
-        }
        explicit svm_c_linear_trainer (
            const scalar_type& C 
@@ -377,6 +377,7 @@ namespace dlib
            Cneg = C;
            verbose = false;
            eps = 0.001;
+            relative_eps = 0.0001;
            max_iterations = 10000;
            learn_nonnegative_weights = false;
            last_weight_1 = false;
@@ -400,6 +401,24 @@ namespace dlib
        const scalar_type get_epsilon (
        ) const { return eps; }
+        void set_relative_epsilon (
+            scalar_type eps_
+        )
+        {
+            // make sure requires clause is not broken
+            DLIB_ASSERT(eps_ > 0,
+                "\t void svm_c_linear_trainer::set_relative_epsilon()"
+                << "\n\t eps_ must be greater than 0"
+                << "\n\t eps_: " << eps_ 
+                << "\n\t this: " << this
+                );
+            relative_eps = eps_;
+        }
+        const scalar_type get_relative_epsilon (
+        ) const { return relative_eps; }
        unsigned long get_max_iterations (
        ) const { return max_iterations; }
@@ -654,14 +673,14 @@ namespace dlib
                                                                         mat(prior_b));
                svm_objective = solver(
-                    make_oca_problem_c_svm<w_type>(Cpos, Cneg, x, y, verbose, eps, max_iterations, dims), 
+                    make_oca_problem_c_svm<w_type>(Cpos, Cneg, x, y, verbose, eps, relative_eps, max_iterations, dims), 
                    w,
                    prior_temp);
            }
            else
            {
                svm_objective = solver(
-                    make_oca_problem_c_svm<w_type>(Cpos, Cneg, x, y, verbose, eps, max_iterations, num_dims), 
+                    make_oca_problem_c_svm<w_type>(Cpos, Cneg, x, y, verbose, eps, relative_eps, max_iterations, num_dims), 
                    w,
                    num_nonnegative,
                    force_weight_1_idx);
@@ -687,6 +706,7 @@ namespace dlib
        scalar_type Cneg;
        oca solver;
        scalar_type eps;
+        scalar_type relative_eps;
        bool verbose;
        unsigned long max_iterations;
        bool learn_nonnegative_weights;

--- a/dlib/svm/svm_c_linear_trainer_abstract.h
+++ b/dlib/svm/svm_c_linear_trainer_abstract.h
@@ -50,6 +50,7 @@ namespace dlib
                - #get_c_class1() == 1
                - #get_c_class2() == 1
                - #get_epsilon() == 0.001
+                - #get_relative_epsilon() == 0.0001
                - this object will not be verbose unless be_verbose() is called
                - #get_max_iterations() == 10000
                - #learns_nonnegative_weights() == false
@@ -70,6 +71,7 @@ namespace dlib
                - #get_c_class1() == C
                - #get_c_class2() == C
                - #get_epsilon() == 0.001
+                - #get_relative_epsilon() == 0.0001
                - this object will not be verbose unless be_verbose() is called
                - #get_max_iterations() == 10000
                - #learns_nonnegative_weights() == false
@@ -96,6 +98,34 @@ namespace dlib
                  train.  You can think of this epsilon value as saying "solve the
                  optimization problem until the probability of misclassification is within
                  epsilon of its optimal value".  
+                  In particular, the solver will terminate when the risk is within eps of optimal.
+                  I.e. it stops if the "risk gap" is less than eps.
+        !*/
+        void set_relative_epsilon (
+            scalar_type eps
+        );
+        /*!
+            requires
+                - eps > 0
+            ensures
+                - #get_relative_epsilon() == eps 
+        !*/
+        const scalar_type get_relative_epsilon (
+        ) const;
+        /*!
+            ensures
+                - returns the relative error epsilon that determines when training should stop.
+                  Smaller values may result in a more accurate solution but take longer to
+                  train.  In particular, when the ratio of the risk gap to current risk is less than
+                  get_relative_epsilon() the solver will terminate. 
+                  This means that, unlike get_epsilon(), get_relative_epsilon() is a relative
+                  measure of accuracy.  To say this another way, the solver terminates when the
+                  possible improvement in risk is less than get_relative_epsilon() fraction of the
+                  current risk. 
        !*/
        void set_max_iterations (