Moved dnn_trainer into its own file.

ae4677bd · Davis King · 37278e99 · ae4677bd · ae4677bd · ae4677bd
Commit ae4677bd authored Oct 03, 2015 by Davis King
5 changed files
--- a/dlib/dnn.h
+++ b/dlib/dnn.h
@@ -9,6 +9,7 @@
 #include "dnn/loss.h"
 #include "dnn/core.h"
 #include "dnn/solvers.h"
+#include "dnn/trainer.h"
 #endif // DLIB_DNn_

--- a/dlib/dnn/core.h
+++ b/dlib/dnn/core.h
@@ -5,12 +5,12 @@
 #include "core_abstract.h"
 #include "tensor.h"
-#include "solvers.h"
 #include <iterator>
 #include <memory>
 #include <type_traits>
 #include "../statistics.h"
 #include "../rand.h"
+#include "../algs.h"
 #include <utility>
@@ -1402,216 +1402,6 @@ namespace dlib
        }
    }
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-    template <
-        typename net_type, 
-        typename solver_type = sgd
-        >
-    class dnn_trainer
-    {
-    public:
-        static_assert(is_loss_layer_type<net_type>::value, 
-            "The last layer in a network must be a loss layer.");
-        typedef typename net_type::label_type label_type;
-        typedef typename net_type::input_type input_type;
-        dnn_trainer(
-        ) 
-        {
-            init();
-        }
-        explicit dnn_trainer(const net_type& net_) :  net(net_) 
-        {
-            init();
-        }
-        dnn_trainer(
-            const net_type& net_, 
-            const solver_type& solver_
-        ) : net(net_), solvers(solver_) 
-        {
-            init();
-        }
-        const net_type& get_net (
-        ) const { return net; }
-        void set_net (
-            const net_type& net_
-        ) 
-        { 
-            return net = net_; 
-        }
-        void set_solver (
-            const solver_type& solver_
-        ) 
-        { 
-            solvers = solver_; 
-        }
-        unsigned long get_mini_batch_size (
-        ) const { return mini_batch_size; }
-        void set_mini_batch_size (
-            unsigned long batch_size 
-        )
-        {
-            DLIB_CASSERT(batch_size > 0,"");
-            mini_batch_size = batch_size;
-        }
-        unsigned long get_num_epochs (
-        ) const { return num_epochs; }
-        void set_num_epochs (
-            unsigned long num
-        ) const 
-        {
-            DLIB_CASSERT(num > 0,"");
-            num_epochs = num;
-        }
-        const sstack<solver_type,net_type::num_layers>& get_solvers (
-        ) const { return solvers; }
-        sstack<solver_type,net_type::num_layers>& get_solvers (
-        ) { return solvers; }
-        const net_type& train (
-            const std::vector<input_type>& data,
-            const std::vector<label_type>& labels 
-        ) 
-        {
-            DLIB_CASSERT(data.size() == labels.size() && data.size() > 0, "");
-            resizable_tensor t1, t2;
-            for (unsigned long epoch_iteration = 0; epoch_iteration < num_epochs; ++epoch_iteration)
-            {
-                unsigned long j = 0;
-                // Load two tensors worth of data at once so we can overlap the computation
-                // and data transfer between the host and the device.
-                if (j < data.size())
-                {
-                    net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t1);
-                    j += mini_batch_size;
-                }
-                if (j < data.size())
-                {
-                    net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t2);
-                    j += mini_batch_size;
-                }
-                unsigned long i = 0;
-                while (i < data.size())
-                {
-                    net.update(t1, labels.begin()+i, solvers);
-                    i += mini_batch_size;
-                    if (j < data.size())
-                    {
-                        net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t1);
-                        j += mini_batch_size;
-                    }
-                    if (i < data.size())
-                    {
-                        net.update(t2, labels.begin()+i, solvers);
-                        i += mini_batch_size;
-                        if (j < data.size())
-                        {
-                            net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t2);
-                            j += mini_batch_size;
-                        }
-                    }
-                }
-            }
-            return net;
-        }
-        const net_type& train (
-            const std::vector<input_type>& data
-        ) 
-        {
-            DLIB_CASSERT(data.size() > 0, "");
-            const bool has_unsupervised_loss = std::is_same<no_label_type, label_type>::value; 
-            static_assert(has_unsupervised_loss, 
-                "You can only call this version of train() when using an unsupervised loss.");
-            resizable_tensor t1, t2;
-            for (unsigned long epoch_iteration = 0; epoch_iteration < num_epochs; ++epoch_iteration)
-            {
-                unsigned long j = 0;
-                // Load two tensors worth of data at once so we can overlap the computation
-                // and data transfer between the host and the device.
-                if (j < data.size())
-                {
-                    net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t1);
-                    j += mini_batch_size;
-                }
-                if (j < data.size())
-                {
-                    net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t2);
-                    j += mini_batch_size;
-                }
-                unsigned long i = 0;
-                while (i < data.size())
-                {
-                    net.update(t1, solvers);
-                    i += mini_batch_size;
-                    if (j < data.size())
-                    {
-                        net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t1);
-                        j += mini_batch_size;
-                    }
-                    if (i < data.size())
-                    {
-                        net.update(t2, solvers);
-                        i += mini_batch_size;
-                        if (j < data.size())
-                        {
-                            net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t2);
-                            j += mini_batch_size;
-                        }
-                    }
-                }
-            }
-            return net;
-        }
-    private:
-        void init()
-        {
-            num_epochs = 300;
-            mini_batch_size = 11;
-        }
-        unsigned long num_epochs;
-        unsigned long mini_batch_size;
-        net_type net;
-        sstack<solver_type,net_type::num_layers> solvers;
-    };
-    // TODO, make dnn_trainer serializable. 
 // ----------------------------------------------------------------------------------------
 }

--- a/dlib/dnn/core_abstract.h
+++ b/dlib/dnn/core_abstract.h
@@ -4,7 +4,6 @@
 #ifdef DLIB_DNn_CORE_ABSTRACT_H_
 #include "tensor_abstract.h"
-#include "solvers_abstract.h"
 #include <memory>
 #include <type_traits>
 #include "../rand.h"
@@ -919,101 +918,9 @@ namespace dlib
              gradients and compares them to the outputs of the layer.  
    !*/
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-    template <
-        typename net_type, 
-        typename solver_type = sgd
-        >
-    class dnn_trainer
-    {
-        /*!
-            REQUIREMENTS ON net_type
-                - net_type is an add_loss_layer object.
-            REQUIREMENTS ON solver_type
-                - solver_type is an implementation of the EXAMPLE_SOLVER interface defined
-                  in solvers_abstract.h
-            WHAT THIS OBJECT REPRESENTS
-        !*/
-    public:
-        typedef typename net_type::label_type label_type;
-        typedef typename net_type::input_type input_type;
-        dnn_trainer(
-        );
-        explicit dnn_trainer(
-            const net_type& net
-        );
-        dnn_trainer(
-            const net_type& net, 
-            const solver_type& solver
-        ); 
-        const net_type& get_net (
-        ) const; 
-        void set_net (
-            const net_type& net
-        ); 
-        void set_solver (
-            const solver_type& solver_
-        );
-        const sstack<solver_type,net_type::num_layers>& get_solvers (
-        ) const; 
-        sstack<solver_type,net_type::num_layers>& get_solvers (
-        ); 
-        unsigned long get_mini_batch_size (
-        ) const; 
-        void set_mini_batch_size (
-            unsigned long batch_size 
-        );
-        unsigned long get_num_epochs (
-        ) const; 
-        void set_num_epochs (
-            unsigned long num
-        ) const;
-        const net_type& train (
-            const std::vector<input_type>& data,
-            const std::vector<label_type>& labels 
-        ); 
-        /*!
-            requires
-                - data.size() == labels.size()
-                - TODO: the net has a supervised loss layer.
-        !*/
-        const net_type& train (
-            const std::vector<input_type>& data
-        );
-        /*!
-            requires 
-                - TODO: the net has an unsupervised loss layer.
-            ensures
-                - trains an auto-encoder
-        !*/
-    };
 // ----------------------------------------------------------------------------------------
 }
-#endif // DLIB_DNn_CORE_ABSTRACT_H_ DLIB_DNn_CORE_H_
+#endif // DLIB_DNn_CORE_ABSTRACT_H_ 
--- a/dlib/dnn/trainer.h
+++ b/dlib/dnn/trainer.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_TRAINER_H_
+#define DLIB_DNn_TRAINER_H_
+#include "trainer_abstract.h"
+#include "core.h"
+#include "solvers.h"
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    template <
+        typename net_type, 
+        typename solver_type = sgd
+        >
+    class dnn_trainer
+    {
+    public:
+        static_assert(is_loss_layer_type<net_type>::value, 
+            "The last layer in a network must be a loss layer.");
+        typedef typename net_type::label_type label_type;
+        typedef typename net_type::input_type input_type;
+        dnn_trainer(
+        ) 
+        {
+            init();
+        }
+        explicit dnn_trainer(const net_type& net_) :  net(net_) 
+        {
+            init();
+        }
+        dnn_trainer(
+            const net_type& net_, 
+            const solver_type& solver_
+        ) : net(net_), solvers(solver_) 
+        {
+            init();
+        }
+        const net_type& get_net (
+        ) const { return net; }
+        void set_net (
+            const net_type& net_
+        ) 
+        { 
+            return net = net_; 
+        }
+        void set_solver (
+            const solver_type& solver_
+        ) 
+        { 
+            solvers = solver_; 
+        }
+        unsigned long get_mini_batch_size (
+        ) const { return mini_batch_size; }
+        void set_mini_batch_size (
+            unsigned long batch_size 
+        )
+        {
+            DLIB_CASSERT(batch_size > 0,"");
+            mini_batch_size = batch_size;
+        }
+        unsigned long get_num_epochs (
+        ) const { return num_epochs; }
+        void set_num_epochs (
+            unsigned long num
+        ) const 
+        {
+            DLIB_CASSERT(num > 0,"");
+            num_epochs = num;
+        }
+        const sstack<solver_type,net_type::num_layers>& get_solvers (
+        ) const { return solvers; }
+        sstack<solver_type,net_type::num_layers>& get_solvers (
+        ) { return solvers; }
+        const net_type& train (
+            const std::vector<input_type>& data,
+            const std::vector<label_type>& labels 
+        ) 
+        {
+            DLIB_CASSERT(data.size() == labels.size() && data.size() > 0, "");
+            resizable_tensor t1, t2;
+            for (unsigned long epoch_iteration = 0; epoch_iteration < num_epochs; ++epoch_iteration)
+            {
+                unsigned long j = 0;
+                // Load two tensors worth of data at once so we can overlap the computation
+                // and data transfer between the host and the device.
+                if (j < data.size())
+                {
+                    net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t1);
+                    j += mini_batch_size;
+                }
+                if (j < data.size())
+                {
+                    net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t2);
+                    j += mini_batch_size;
+                }
+                unsigned long i = 0;
+                while (i < data.size())
+                {
+                    net.update(t1, labels.begin()+i, solvers);
+                    i += mini_batch_size;
+                    if (j < data.size())
+                    {
+                        net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t1);
+                        j += mini_batch_size;
+                    }
+                    if (i < data.size())
+                    {
+                        net.update(t2, labels.begin()+i, solvers);
+                        i += mini_batch_size;
+                        if (j < data.size())
+                        {
+                            net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t2);
+                            j += mini_batch_size;
+                        }
+                    }
+                }
+            }
+            return net;
+        }
+        const net_type& train (
+            const std::vector<input_type>& data
+        ) 
+        {
+            DLIB_CASSERT(data.size() > 0, "");
+            const bool has_unsupervised_loss = std::is_same<no_label_type, label_type>::value; 
+            static_assert(has_unsupervised_loss, 
+                "You can only call this version of train() when using an unsupervised loss.");
+            resizable_tensor t1, t2;
+            for (unsigned long epoch_iteration = 0; epoch_iteration < num_epochs; ++epoch_iteration)
+            {
+                unsigned long j = 0;
+                // Load two tensors worth of data at once so we can overlap the computation
+                // and data transfer between the host and the device.
+                if (j < data.size())
+                {
+                    net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t1);
+                    j += mini_batch_size;
+                }
+                if (j < data.size())
+                {
+                    net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t2);
+                    j += mini_batch_size;
+                }
+                unsigned long i = 0;
+                while (i < data.size())
+                {
+                    net.update(t1, solvers);
+                    i += mini_batch_size;
+                    if (j < data.size())
+                    {
+                        net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t1);
+                        j += mini_batch_size;
+                    }
+                    if (i < data.size())
+                    {
+                        net.update(t2, solvers);
+                        i += mini_batch_size;
+                        if (j < data.size())
+                        {
+                            net.to_tensor(data.begin()+j, data.begin()+std::min(j+mini_batch_size,data.size()), t2);
+                            j += mini_batch_size;
+                        }
+                    }
+                }
+            }
+            return net;
+        }
+    private:
+        void init()
+        {
+            num_epochs = 300;
+            mini_batch_size = 11;
+        }
+        unsigned long num_epochs;
+        unsigned long mini_batch_size;
+        net_type net;
+        sstack<solver_type,net_type::num_layers> solvers;
+    };
+    // TODO, make dnn_trainer serializable. 
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_DNn_TRAINER_H_
--- a/dlib/dnn/trainer_abstract.h
+++ b/dlib/dnn/trainer_abstract.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_DNn_TRAINER_ABSTRACT_H_
+#ifdef DLIB_DNn_TRAINER_ABSTRACT_H_
+#include "core_abstract.h"
+#include "solvers_abstract.h"
+#include <vector>
+namespace dlib
+{
+// ----------------------------------------------------------------------------------------
+    template <
+        typename net_type, 
+        typename solver_type = sgd
+        >
+    class dnn_trainer
+    {
+        /*!
+            REQUIREMENTS ON net_type
+                - net_type is an add_loss_layer object.
+            REQUIREMENTS ON solver_type
+                - solver_type is an implementation of the EXAMPLE_SOLVER interface defined
+                  in solvers_abstract.h
+            WHAT THIS OBJECT REPRESENTS
+        !*/
+    public:
+        typedef typename net_type::label_type label_type;
+        typedef typename net_type::input_type input_type;
+        dnn_trainer(
+        );
+        explicit dnn_trainer(
+            const net_type& net
+        );
+        dnn_trainer(
+            const net_type& net, 
+            const solver_type& solver
+        ); 
+        const net_type& get_net (
+        ) const; 
+        void set_net (
+            const net_type& net
+        ); 
+        void set_solver (
+            const solver_type& solver_
+        );
+        const sstack<solver_type,net_type::num_layers>& get_solvers (
+        ) const; 
+        sstack<solver_type,net_type::num_layers>& get_solvers (
+        ); 
+        unsigned long get_mini_batch_size (
+        ) const; 
+        void set_mini_batch_size (
+            unsigned long batch_size 
+        );
+        unsigned long get_num_epochs (
+        ) const; 
+        void set_num_epochs (
+            unsigned long num
+        ) const;
+        const net_type& train (
+            const std::vector<input_type>& data,
+            const std::vector<label_type>& labels 
+        ); 
+        /*!
+            requires
+                - data.size() == labels.size()
+                - TODO: the net has a supervised loss layer.
+        !*/
+        const net_type& train (
+            const std::vector<input_type>& data
+        );
+        /*!
+            requires 
+                - TODO: the net has an unsupervised loss layer.
+            ensures
+                - trains an auto-encoder
+        !*/
+    };
+// ----------------------------------------------------------------------------------------
+}
+#endif // DLIB_DNn_TRAINER_ABSTRACT_H_