Made the elastic_net inputs be in terms of trans(X)*X and trans(X)*Y rather

than raw X and Y matrices.

Made the elastic_net inputs be in terms of trans(X)X and trans(X)Y rather
than raw X and Y matrices.
dfd9543c · Davis King · 69a12074 · dfd9543c · dfd9543c · dfd9543c
Commit dfd9543c authored Apr 26, 2016 by Davis King
3 changed files
--- a/dlib/optimization/elastic_net.h
+++ b/dlib/optimization/elastic_net.h
@@ -17,13 +17,16 @@ namespace dlib

        template <typename EXP>
        explicit elastic_net(
-            const matrix_exp<EXP>& X_
+            const matrix_exp<EXP>& XX
        ) : eps(1e-5), max_iterations(50000), verbose(false)
        {
            // make sure requires clause is not broken
-            DLIB_ASSERT(X_.size() > 0,
-                "\t elastic_net::elastic_net(X)"
-                << " \n\t X can't be empty"
+            DLIB_ASSERT(XX.size() > 0 &&
+                        XX.nr() == XX.nc(),
+                "\t elastic_net::elastic_net(XX)"
+                << " \n\t XX must be a non-empty square matrix."
+                << " \n\t XX.nr():   " << XX.nr() 
+                << " \n\t XX.nc():   " << XX.nc() 
                << " \n\t this: " << this
                );

@@ -32,13 +35,11 @@ namespace dlib
            // rows then we can get rid of them by doing some SVD magic.  Doing this doesn't
            // make the final results of anything change but makes all the matrices have
            // dimensions that are X.nr() in size, which can be much smaller.
-            matrix<double> XX;
-            XX = X_*trans(X_);
            matrix<double,0,1> s;
            svd3(XX,u,eig_vals,eig_vects);
            s = sqrt(eig_vals);
            X = eig_vects*diagm(s);
-            u = trans(X_)*tmp(eig_vects*inv(diagm(s)));
+            u = eig_vects*inv(diagm(s));



@@ -65,46 +66,48 @@ namespace dlib

        template <typename EXP1, typename EXP2>
        elastic_net(
-            const matrix_exp<EXP1>& X_,
-            const matrix_exp<EXP2>& Y_
-        ) : elastic_net(X_)
+            const matrix_exp<EXP1>& XX,
+            const matrix_exp<EXP2>& XY
+        ) : elastic_net(XX)
        {
            // make sure requires clause is not broken
-            DLIB_ASSERT(X_.size() > 0 && 
-                        is_col_vector(Y_) && 
-                        X_.nc() == Y_.size() ,
-                "\t elastic_net::elastic_net(X,Y)"
+            DLIB_ASSERT(XX.size() > 0 && 
+                        XX.nr() == XX.nc() &&
+                        is_col_vector(XY) && 
+                        XX.nc() == XY.size() ,
+                "\t elastic_net::elastic_net(XX,XY)"
                << " \n\t Invalid inputs were given to this function."
-                << " \n\t X_.size(): " << X_.size() 
-                << " \n\t is_col_vector(Y_): " << is_col_vector(Y_) 
-                << " \n\t X_.nc():   " << X_.nc() 
-                << " \n\t Y_.size(): " << Y_.size() 
+                << " \n\t XX.size(): " << XX.size() 
+                << " \n\t is_col_vector(XY): " << is_col_vector(XY) 
+                << " \n\t XX.nr():   " << XX.nr() 
+                << " \n\t XX.nc():   " << XX.nc() 
+                << " \n\t XY.size(): " << XY.size() 
                << " \n\t this: " << this
                );

-            set_y(Y_);
+            set_xy(XY);
        }

        long size (
        ) const { return u.nr(); }

        template <typename EXP>
-        void set_y(
-            const matrix_exp<EXP>& Y_
+        void set_xy(
+            const matrix_exp<EXP>& XY
        )
        {
            // make sure requires clause is not broken
-            DLIB_ASSERT(is_col_vector(Y_) && 
-                        Y_.size() == size(),
+            DLIB_ASSERT(is_col_vector(XY) && 
+                        XY.size() == size(),
                "\t void elastic_net::set_y(Y)"
                << " \n\t Invalid inputs were given to this function."
-                << " \n\t is_col_vector(Y_): " << is_col_vector(Y_) 
+                << " \n\t is_col_vector(XY): " << is_col_vector(XY) 
                << " \n\t size():    " << size() 
-                << " \n\t Y_.size(): " << Y_.size() 
+                << " \n\t XY.size(): " << XY.size() 
                << " \n\t this: " << this
                );

-            Y = trans(u)*Y_;
+            Y = trans(u)*XY;
            // We can use the ynorm after it has been projected because the only place Y
            // appears in the algorithm is in terms of dot products with w and x vectors.
            // But those vectors are always in the span of X and therefore we only see the

--- a/dlib/optimization/elastic_net_abstract.h
+++ b/dlib/optimization/elastic_net_abstract.h
@@ -44,52 +44,61 @@ namespace dlib

        template <typename EXP>
        explicit elastic_net(
-            const matrix_exp<EXP>& X
+            const matrix_exp<EXP>& XX
        ); 
        /*!
            requires
-                - X.size() != 0
+                - XX.size() != 0
+                - XX.nr() == XX.nc()
            ensures
                - #get_epsilon() == 1e-5
                - #get_max_iterations() == 50000
-                - this object will not be verbose unless be_verbose() is called
-                - #size() == X.nc()
+                - This object will not be verbose unless be_verbose() is called.
+                - #size() == XX.nc()
                - #have_target_values() == false
+                - We interpret XX as trans(X)*X where X is as defined in the objective
+                  function discussed above in WHAT THIS OBJECT REPRESENTS.
        !*/

        template <typename EXP1, typename EXP2>
        elastic_net(
-            const matrix_exp<EXP1>& X,
-            const matrix_exp<EXP2>& Y
+            const matrix_exp<EXP1>& XX,
+            const matrix_exp<EXP2>& XY
        ); 
        /*!
            requires
-                - X.size() != 0
-                - is_col_vector(Y)
-                - X.nc() == Y.size()
+                - XX.size() != 0
+                - XX.nr() == XX.nc()
+                - is_col_vector(XY)
+                - XX.nc() == Y.size()
            ensures
-                - constructs this object by calling the elastic_net(X) constructor and then
-                  calling this->set_y(Y).
+                - constructs this object by calling the elastic_net(XX) constructor and
+                  then calling this->set_xy(XY).
                - #have_target_values() == true 
+                - We interpret XX as trans(X)*X where X is as defined in the objective
+                  function discussed above in WHAT THIS OBJECT REPRESENTS.  Similarly, XY
+                  should be trans(X)*Y.
        !*/

        long size (
        ) const; 
        /*!
            ensures
-                - returns the number of samples loaded into this object.  
+                - returns the dimensionality of the data loaded into this object.  That is,
+                  how many elements are in the optimal w vector?  This function returns
+                  that number.
        !*/

        bool have_target_values (
        ) const;
        /*!
            ensures
-                - returns true if set_y() has been called and false otherwise.
+                - returns true if set_xy() has been called and false otherwise.
        !*/

        template <typename EXP>
-        void set_y(
-            const matrix_exp<EXP>& Y
+        void set_xy(
+            const matrix_exp<EXP>& XY
        );
        /*!
            requires
@@ -97,8 +106,9 @@ namespace dlib
                - Y.size() == size()
            ensures
                - #have_target_values() == true
-                - Sets the target values, the Y variable in the objective function, to the
-                  given Y.
+                - Sets the target values of the regression.  Note that we expect the given
+                  matrix, XY, to be equal to trans(X)*Y, where X and Y have the definitions
+                  discussed above in WHAT THIS OBJECT REPRESENTS.
        !*/

        void set_epsilon(
@@ -164,6 +174,7 @@ namespace dlib
            ensures
                - Solves the optimization problem described in the WHAT THIS OBJECT
                  REPRESENTS section above and returns the optimal w.
+                - The returned vector has size() elements.
                - if (lasso_budget == infinity) then
                    - The lasso constraint is ignored 
        !*/

--- a/dlib/test/elastic_net.cpp
+++ b/dlib/test/elastic_net.cpp
@@ -95,7 +95,7 @@ namespace
            double lasso_budget = sum(abs(w));
            double eps = 0.0000001;

-            dlib::elastic_net solver(X,Y);
+            dlib::elastic_net solver(X*trans(X),X*Y);
            solver.set_epsilon(eps);