Adding a rough initial version of a deep learning API.

f335ce4f · Davis King · 16ea6f11 · f335ce4f · f335ce4f · f335ce4f
Commit f335ce4f authored Sep 23, 2015 by Davis King
9 changed files
--- a/dlib/dnn.h
+++ b/dlib/dnn.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_
+#define DLIB_DNn_
+
+#include "dnn/tensor.h"
+#include "dnn/input.h"
+#include "dnn/layers.h"
+#include "dnn/loss.h"
+#include "dnn/core.h"
+#include "dnn/solvers.h"
+
+#endif // DLIB_DNn_
+
+
--- a/dlib/dnn/core.h
+++ b/dlib/dnn/core.h
--- a/dlib/dnn/core_abstract.h
+++ b/dlib/dnn/core_abstract.h
--- a/dlib/dnn/input.h
+++ b/dlib/dnn/input.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_INPUT_H_
+#define DLIB_DNn_INPUT_H_
+
+#include <dlib/matrix.h>
+#include <dlib/pixel.h>
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    class input 
+    {
+    public:
+
+        // sample_expansion_factor must be > 0
+        const static unsigned int sample_expansion_factor = 1;
+        typedef T input_type;
+
+        template <typename input_iterator>
+        void to_tensor (
+            input_iterator begin,
+            input_iterator end,
+            resizable_tensor& data
+        ) const
+        /*!
+            requires
+                - [begin, end) is an iterator range over input_type objects.
+            ensures
+                - Converts the iterator range into a tensor and stores it into #data.
+                - Normally you would have #data.num_samples() == distance(begin,end) but
+                  you can also expand the output by some integer factor so long as the loss
+                  you use can deal with it correctly.
+                - #data.num_samples() == distance(begin,end)*sample_expansion_factor. 
+        !*/
+        {
+            // initialize data to the right size to contain the stuff in the iterator range.
+
+            for (input_iterator i = begin; i != end; ++i)
+            {
+                matrix<rgb_pixel> temp = *i;
+                // now copy *i into the right part of data.
+            }
+        }
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T,long NR, typename MM, typename L>
+    class input<matrix<T,NR,1,MM,L>> 
+    {
+    public:
+
+        // TODO, maybe we should only allow T to be float?  Seems kinda pointless to allow
+        // double. Don't forget to remove the matrix_cast if we enforce just float.
+        typedef matrix<T,NR,1,MM,L> input_type;
+        const static unsigned int sample_expansion_factor = 1;
+
+        template <typename input_iterator>
+        void to_tensor (
+            input_iterator begin,
+            input_iterator end,
+            resizable_tensor& data
+        ) const
+        /*!
+            requires
+                - [begin, end) is an iterator range over input_type objects.
+            ensures
+                - converts the iterator range into a tensor and stores it into #data.
+                - Normally you would have #data.num_samples() == distance(begin,end) but
+                  you can also expand the output by some integer factor so long as the loss
+                  you use can deal with it correctly.
+                - #data.num_samples() == distance(begin,end)*sample_expansion_factor. 
+        !*/
+        {
+            // initialize data to the right size to contain the stuff in the iterator range.
+            data.set_size(std::distance(begin,end), 1, 1, begin->size());
+
+            unsigned long idx = 0;
+            for (input_iterator i = begin; i != end; ++i)
+            {
+                data.set_sample(idx++, matrix_cast<float>(*i));
+            }
+        }
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    class input2
+    {
+    public:
+
+        input2(){}
+
+        input2(const input<T>&) {}
+
+        typedef T input_type;
+        const static unsigned int sample_expansion_factor = 1;
+
+        template <typename input_iterator>
+        void to_tensor (
+            input_iterator begin,
+            input_iterator end,
+            resizable_tensor& data
+        ) const
+        /*!
+            requires
+                - [begin, end) is an iterator range over T objects.
+            ensures
+                - converts the iterator range into a tensor and stores it into #data.
+                - Normally you would have #data.num_samples() == distance(begin,end) but
+                  you can also expand the output by some integer factor so long as the loss
+                  you use can deal with it correctly.
+                - #data.num_samples() == distance(begin,end)*K where K is an integer >= 1. 
+        !*/
+        {
+            // initialize data to the right size to contain the stuff in the iterator range.
+
+            for (input_iterator i = begin; i != end; ++i)
+            {
+                matrix<rgb_pixel> temp = *i;
+                // now copy *i into the right part of data.
+            }
+        }
+    };
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // #define DLIB_DNn_INPUT_H_
+
--- a/dlib/dnn/layers.h
+++ b/dlib/dnn/layers.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_LAYERS_H_
+#define DLIB_DNn_LAYERS_H_
+
+#include "layers_abstract.h"
+#include "tensor.h"
+#include "core.h"
+#include <iostream>
+#include <string>
+#include <dlib/rand.h>
+#include <dlib/string.h>
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class con_
+    {
+    public:
+        con_()
+        {}
+
+        template <typename SUB_NET>
+        void setup (const SUB_NET& sub)
+        {
+            // TODO
+        }
+
+        template <typename SUB_NET>
+        void forward(const SUB_NET& sub, resizable_tensor& output)
+        {
+            // TODO
+        } 
+
+        template <typename SUB_NET>
+        void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad)
+        {
+            // TODO
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+    private:
+
+        resizable_tensor params;
+    };
+
+    template <typename SUB_NET>
+    using con = add_layer<con_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class fc_
+    {
+    public:
+        fc_() : num_outputs(1)
+        {
+            rnd.set_seed("fc_" + cast_to_string(num_outputs));
+        }
+
+        explicit fc_(unsigned long num_outputs_)
+        {
+            num_outputs = num_outputs_;
+            rnd.set_seed("fc_" + cast_to_string(num_outputs));
+        }
+
+        unsigned long get_num_outputs (
+        ) const { return num_outputs; }
+
+        template <typename SUB_NET>
+        void setup (const SUB_NET& sub)
+        {
+            num_inputs = sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k();
+            params.set_size(num_inputs, num_outputs);
+
+            std::cout << "fc_::setup() " << params.size() << std::endl;
+
+            randomize_parameters(params, num_inputs+num_outputs, rnd);
+        }
+
+        template <typename SUB_NET>
+        void forward(const SUB_NET& sub, resizable_tensor& output)
+        {
+            output.set_size(sub.get_output().num_samples(), num_outputs);
+
+            output = mat(sub.get_output())*mat(params);
+        } 
+
+        template <typename SUB_NET>
+        void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad)
+        {
+            // d1*W*p1 + d2*W*p2
+            // total gradient = [d1*W; d2*W; d3*W; ...] == D*W
+
+
+            // compute the gradient of the parameters.  
+            params_grad += trans(mat(sub.get_output()))*mat(gradient_input);
+
+            // compute the gradient for the data
+            sub.get_gradient_input() += mat(gradient_input)*trans(mat(params));
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+    private:
+
+        unsigned long num_outputs;
+        unsigned long num_inputs;
+        resizable_tensor params;
+        dlib::rand rnd;
+    };
+
+
+    template <typename SUB_NET>
+    using fc = add_layer<fc_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class relu_
+    {
+    public:
+        relu_() 
+        {
+        }
+
+        template <typename SUB_NET>
+        void setup (const SUB_NET& sub)
+        {
+        }
+
+        template <typename SUB_NET>
+        void forward(const SUB_NET& sub, resizable_tensor& output)
+        {
+            output.copy_size(sub.get_output());
+            output = lowerbound(mat(sub.get_output()), 0);
+        } 
+
+        template <typename SUB_NET>
+        void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad)
+        {
+            const float* grad = gradient_input.host();
+            const float* in = sub.get_output().host();
+            float* out = sub.get_gradient_input().host();
+            for (unsigned long i = 0; i < sub.get_output().size(); ++i)
+            {
+                if (in[i] > 0)
+                    out[i] += grad[i];
+            }
+
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+    private:
+
+        resizable_tensor params;
+    };
+
+
+    template <typename SUB_NET>
+    using relu = add_layer<relu_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class multiply_
+    {
+    public:
+        multiply_() 
+        {
+        }
+
+
+        template <typename SUB_NET>
+        void setup (const SUB_NET& sub)
+        {
+            num_inputs = sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k();
+            params.set_size(1, num_inputs);
+
+            std::cout << "multiply_::setup() " << params.size() << std::endl;
+
+            const int num_outputs = num_inputs;
+
+            randomize_parameters(params, num_inputs+num_outputs, rnd);
+        }
+
+        template <typename SUB_NET>
+        void forward(const SUB_NET& sub, resizable_tensor& output)
+        {
+            DLIB_CASSERT( sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k() == params.size(), "");
+            DLIB_CASSERT( sub.get_output().nr()*sub.get_output().nc()*sub.get_output().k() == num_inputs, "");
+
+            output.copy_size(sub.get_output());
+            auto indata = sub.get_output().host();
+            auto outdata = output.host();
+            auto paramdata = params.host();
+            for (int i = 0; i < sub.get_output().num_samples(); ++i)
+            {
+                for (int j = 0; j < num_inputs; ++j)
+                {
+                    *outdata++ = *indata++ * paramdata[j];
+                }
+            }
+        } 
+
+        template <typename SUB_NET>
+        void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad)
+        {
+            params_grad += sum_rows(pointwise_multiply(mat(sub.get_output()),mat(gradient_input)));
+
+            for (long i = 0; i < gradient_input.num_samples(); ++i)
+            {
+                sub.get_gradient_input().add_to_sample(i, 
+                    pointwise_multiply(rowm(mat(gradient_input),i), mat(params)));
+            }
+        }
+
+        const tensor& get_layer_params() const { return params; }
+        tensor& get_layer_params() { return params; }
+
+    private:
+
+        int num_inputs;
+        resizable_tensor params;
+        dlib::rand rnd;
+    };
+
+    template <typename SUB_NET>
+    using multiply = add_layer<multiply_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // #define DLIB_DNn_LAYERS_H_
+
+
--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#undef DLIB_DNn_LAYERS_ABSTRACT_H_
+#ifdef DLIB_DNn_LAYERS_ABSTRACT_H_
+
+#include "tensor_abstract.h"
+#include "core_abstract.h"
+
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class SUB_NET 
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+
+                By "Sub net" we mean the part of the network closer to the input.  Whenever
+                you get a SUB_NET it will always have computed its outputs and they will be
+                available in get_output().
+
+        !*/
+
+    public:
+
+        const tensor& get_output(
+        ) const;
+
+        tensor& get_gradient_input(
+        );
+
+        const NEXT_SUB_NET& sub_net(
+        ) const;
+
+        NEXT_SUB_NET& sub_net(
+        );
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class EXAMPLE_LAYER_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                Each layer in a deep neural network can be thought of as a function,
+                f(data,parameters), that takes in a data tensor, some parameters, and
+                produces an output tensor.  You create an entire deep network by composing
+                these functions.  Importantly, you are able to use a wide range of
+                different functions to accommodate whatever task you are trying to accomplish.
+                Dlib includes a number of common layer types but if you want to define your
+                own then you simply implement a class with the same interface as EXAMPLE_LAYER_.
+
+        !*/
+
+    public:
+
+        EXAMPLE_LAYER_(
+        );
+        /*!
+            ensures
+                - Default constructs this object.  This function is not required to do
+                  anything in particular but it is required that layer objects be default
+                  constructable. 
+        !*/
+
+        template <typename SUB_NET>
+        void setup (
+            const SUB_NET& sub
+        );
+        /*!
+            requires
+                - SUB_NET implements the SUB_NET interface defined at the top of this file.
+            ensures
+                - performs any necessary initial memory allocations and/or sets parameters
+                  to their initial values prior to learning.  Therefore, calling setup
+                  destroys any previously learned parameters.
+        !*/
+
+        template <typename SUB_NET>
+        void forward(
+            const SUB_NET& sub, 
+            resizable_tensor& output
+        );
+        /*!
+            requires
+                - SUB_NET implements the SUB_NET interface defined at the top of this file.
+                - setup() has been called.
+            ensures
+                - Runs the output of the sub-network through this layer and stores the
+                  output into #output.  In particular, forward() can use any of the outputs
+                  in sub (e.g. sub.get_output(), sub.sub_net().get_output(), etc.) to
+                  compute whatever it wants.
+                - #output.num_samples() == sub.get_output().num_samples()
+        !*/
+
+        template <typename SUB_NET>
+        void backward(
+            const tensor& gradient_input, 
+            SUB_NET& sub, 
+            tensor& params_grad
+        );
+        /*!
+            requires
+                - SUB_NET implements the SUB_NET interface defined at the top of this file.
+                - setup() has been called.
+                - gradient_input has the same dimensions as the output of forward(sub,output).
+                - have_same_dimensions(sub.get_gradient_input(), sub.get_output()) == true
+                - have_same_dimensions(params_grad, get_layer_params()) == true
+            ensures
+                - This function outputs the gradients of this layer with respect to the
+                  input data from sub and also with respect to this layer's parameters.
+                  These gradients are stored into #sub and #params_grad, respectively. To be
+                  precise, the gradients are taken of a function f(sub,get_layer_params())
+                  which is defined thusly:   
+                    - let OUT be the output of forward(sub,OUT).
+                    - let f(sub,get_layer_params()) == dot(OUT, gradient_input)
+                  Then we define the following gradient vectors: 
+                    - PARAMETER_GRADIENT == gradient of f(sub,get_layer_params()) with
+                      respect to get_layer_params(). 
+                    - for all valid I:
+                        - DATA_GRADIENT_I == gradient of f(sub,get_layer_params()) with
+                          respect to layer<I>(sub).get_output() (recall that forward() can
+                          draw inputs from the immediate sub layer, sub.sub_net(), or
+                          any earlier layer.  So you must consider the gradients with
+                          respect to all inputs drawn from sub)
+                  Finally, backward() adds these gradients into the output by performing:
+                    - params_grad += PARAMETER_GRADIENT
+                    - for all valid I:
+                        - layer<I>(sub).get_gradient_input() += DATA_GRADIENT_I
+        !*/
+
+        const tensor& get_layer_params(
+        ) const; 
+        /*!
+            ensures
+                - returns the parameters that define the behavior of forward().
+        !*/
+
+        tensor& get_layer_params(
+        ); 
+        /*!
+            ensures
+                - returns the parameters that define the behavior of forward().
+        !*/
+
+    };
+
+    // For each layer you define, always define an add_layer template so that layers can be
+    // easily composed.  Moreover, the convention is that the layer class ends with an _
+    // while the add_layer template has the same name but without the trailing _.
+    template <typename SUB_NET>
+    using EXAMPLE_LAYER = add_layer<EXAMPLE_LAYER_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    class fc_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This is an implementation of the EXAMPLE_LAYER_ interface defined above.
+                In particular, it defines a fully connected layer that takes an input
+                tensor and multiplies it by a weight matrix and outputs the results.
+        !*/
+
+    public:
+        fc_(
+        );
+        /*!
+            ensures
+                - #get_num_outputs() == 1
+        !*/
+
+        explicit fc_(
+            unsigned long num_outputs
+        );
+        /*!
+            ensures
+                - #get_num_outputs() == num_outputs
+        !*/
+
+        unsigned long get_num_outputs (
+        ) const; 
+        /*!
+            ensures
+                - This layer outputs column vectors that contain get_num_outputs()
+                  elements. That is, the output tensor T from forward() will be such that:
+                    - T.num_samples() == however many samples were given to forward().
+                    - T.nr() == get_num_outputs()
+                    - The rest of the dimensions of T will be 1.
+        !*/
+
+        template <typename SUB_NET> void setup (const SUB_NET& sub);
+        template <typename SUB_NET> void forward(const SUB_NET& sub, resizable_tensor& output);
+        template <typename SUB_NET> void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad);
+        const tensor& get_layer_params() const; 
+        tensor& get_layer_params(); 
+        /*!
+            These functions are implemented as described in the EXAMPLE_LAYER_ interface.
+        !*/
+    };
+
+
+    template <typename SUB_NET>
+    using fc = add_layer<fc_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class relu_
+    {
+    public:
+
+        relu_(
+        );
+
+        template <typename SUB_NET> void setup (const SUB_NET& sub);
+        template <typename SUB_NET> void forward(const SUB_NET& sub, resizable_tensor& output);
+        template <typename SUB_NET> void backward(const tensor& gradient_input, SUB_NET& sub, tensor& params_grad);
+        const tensor& get_layer_params() const; 
+        tensor& get_layer_params(); 
+        /*!
+            These functions are implemented as described in the EXAMPLE_LAYER_ interface.
+        !*/
+    };
+
+
+    template <typename SUB_NET>
+    using relu = add_layer<relu_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // #define DLIB_DNn_LAYERS_H_
+
--- a/dlib/dnn/loss.h
+++ b/dlib/dnn/loss.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_LOSS_H_
+#define DLIB_DNn_LOSS_H_
+
+#include "core.h"
+#include <dlib/matrix.h>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    class loss_binary_hinge_ 
+    {
+    public:
+
+        const static unsigned int sample_expansion_factor = 1;
+        typedef double label_type;
+
+        // Implementing to_label() is optional.  If you don't do it then it just means the
+        // automatic operator() mapping from tensors to outputs is missing from the net object.
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const
+        /*!
+            requires
+                - SUB_NET implements the SUB_NET interface defined at the top of layers_abstract.h.
+                - sub.get_output().num_samples() must be a multiple of sample_expansion_factor.
+                - iter == an iterator pointing to the beginning of a range of
+                  sub.get_output().num_samples()/sample_expansion_factor elements.  In
+                  particular, they must be label_type elements.
+        !*/
+        {
+            const tensor& output_tensor = sub.get_output();
+            DLIB_CASSERT(output_tensor.nr() == 1 && 
+                         output_tensor.nc() == 1 && 
+                         output_tensor.k() == 1,"");
+            DLIB_CASSERT(output_tensor.num_samples()%sample_expansion_factor == 0,"");
+
+            const float* out_data = output_tensor.host();
+            for (unsigned long i = 0; i < output_tensor.num_samples(); ++i)
+            {
+                *iter++ = out_data[i];
+            }
+        }
+
+        template <
+            typename label_iterator,
+            typename SUB_NET
+            >
+        double compute_loss (
+            const tensor& input_tensor,
+            label_iterator truth, // TODO, this parameter is optional.
+            SUB_NET& sub
+        ) const
+        /*!
+            requires
+                - SUB_NET implements the SUB_NET interface defined at the top of layers_abstract.h.
+                - input_tensor was given as input to the network sub and the outputs are now
+                  visible in sub.get_output(), sub.sub_net().get_output(), etc.
+                - input_tensor.num_samples() must be a multiple of sample_expansion_factor.
+                - input_tensor.num_samples() == sub.get_output().num_samples() == grad.num_samples()
+                - truth == an iterator pointing to the beginning of a range of
+                  input_tensor.num_samples()/sample_expansion_factor elements.  In particular,
+                  they must be label_type elements.
+                - sub.get_gradient_input() has the same dimensions as sub.get_output().
+                - for all valid i:
+                    - *(truth+i/sample_expansion_factor) is the label of the ith sample in
+                      sub.get_output().
+            ensures
+                - #sub.get_gradient_input() == the gradient of the loss with respect to
+                  sub.get_output().
+        !*/
+        {
+            const tensor& output_tensor = sub.get_output();
+            tensor& grad = sub.get_gradient_input();
+
+            // TODO, throw an exception instead of asserting, probably...
+            DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples(),"");
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples(),"");
+            DLIB_CASSERT(output_tensor.nr() == 1 && 
+                         output_tensor.nc() == 1 && 
+                         output_tensor.k() == 1,"");
+
+            // The loss we output is the average loss over the mini-batch.
+            const double scale = 1.0/output_tensor.num_samples();
+            double loss = 0;
+            const float* out_data = output_tensor.host();
+            float* g = grad.host();
+            for (unsigned long i = 0; i < output_tensor.num_samples(); ++i)
+            {
+                const float y = *truth++;
+                const float temp = 1-y*out_data[i];
+                if (temp > 0)
+                {
+                    loss += scale*temp;
+                    g[i] = -scale*y;
+                }
+                else
+                {
+                    g[i] = 0;
+                }
+            }
+            return loss;
+        }
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename SUB_NET>
+    using loss_binary_hinge = add_loss<loss_binary_hinge_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+    class loss_no_label_ 
+    {
+    public:
+
+        //typedef int label_type;
+
+        const static unsigned int sample_expansion_factor = 1;
+
+
+        template <
+            typename SUB_NET
+            >
+        double compute_loss (
+            const tensor& input_tensor,
+            SUB_NET& sub
+        ) const
+        /*!
+            requires
+                - SUB_NET implements the SUB_NET interface defined at the top of layers_abstract.h.
+                - input_tensor was given as input to the network sub and the outputs are now
+                  visible in sub.get_output(), sub.sub_net().get_output(), etc.
+                - input_tensor.num_samples() must be a multiple of sample_expansion_factor.
+                - input_tensor.num_samples() == sub.get_output().num_samples() == grad.num_samples()
+                - truth == an iterator pointing to the beginning of a range of
+                  input_tensor.num_samples()/sample_expansion_factor elements.  In particular,
+                  they must be label_type elements.
+                - sub.get_gradient_input() has the same dimensions as sub.get_output().
+                - for all valid i:
+                    - *(truth+i/sample_expansion_factor) is the label of the ith sample in
+                      sub.get_output().
+            ensures
+                - #sub.get_gradient_input() == the gradient of the loss with respect to
+                  sub.get_output().
+        !*/
+        {
+            return 0;
+        }
+
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename SUB_NET>
+    using loss_no_label = add_loss<loss_no_label_, SUB_NET>;
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // #define DLIB_DNn_LOSS_H_
+
+
--- a/dlib/dnn/solvers.h
+++ b/dlib/dnn/solvers.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_SOLVERS_H_
+#define DLIB_DNn_SOLVERS_H_
+
+#include "tensor.h"
+#include <iostream>
+
+namespace dlib
+{
+    /*
+        class EXAMPLE_SOLVER 
+        {
+        };
+    */
+
+    struct sgd
+    {
+
+        matrix<float> v;
+        float weight_decay;
+        float eps;
+        float momentum;
+        sgd(double eps_ = 0.001) 
+        { 
+            weight_decay = 0.0005;
+            eps = eps_;
+            //eps = 0.001;
+            momentum = 0.9;
+        }
+
+        template <typename layer_type>
+        void operator() (layer_type& l, const tensor& params_grad)
+        /*!
+            requires
+                - l.get_layer_params().size() != 0
+                - l.get_layer_params() and params_grad have the same dimensions.
+        !*/
+        {
+            if (v.size() != 0)
+                v = momentum*v - weight_decay*eps*mat(l.get_layer_params()) - eps*mat(params_grad);
+            else
+                v =            - weight_decay*eps*mat(l.get_layer_params()) - eps*mat(params_grad);
+
+            l.get_layer_params() += v;
+        }
+    };
+
+
+}
+
+#endif // #define DLIB_DNn_SOLVERS_H_
+
+
+
--- a/dlib/dnn/tensor.h
+++ b/dlib/dnn/tensor.h
+// Copyright (C) 2015  Davis E. King (davis@dlib.net)
+// License: Boost Software License   See LICENSE.txt for the full license.
+#ifndef DLIB_DNn_TENSOR_H_
+#define DLIB_DNn_TENSOR_H_
+
+#include <memory>
+#include <cstring>
+#include <dlib/matrix.h>
+
+namespace dlib
+{
+
+// ----------------------------------------------------------------------------------------
+
+    class gpu_data 
+    {
+        /*!
+            CONVENTION
+                - if (size() != 0) then
+                    - data_host == a pointer to size() floats in CPU memory.
+                - if (data_device) then 
+                    - data_device == a pointer to size() floats in device memory.
+
+
+                - We use the host_current and device_current bools to keep track of which
+                  copy of the data (or both) are most current.  e.g. if the CPU has
+                  modified the tensor and it hasn't been copied to the device yet then
+                  host_current==true and device_current == false.
+
+        !*/
+    public:
+
+        gpu_data(
+        ) : data_size(0), host_current(true), device_current(false)
+        {
+        }
+
+        // Not copyable
+        gpu_data(const gpu_data&) = delete;
+        gpu_data& operator=(const gpu_data&) = delete;
+
+        // but is movable
+        gpu_data(gpu_data&&) = default;
+        gpu_data& operator=(gpu_data&&) = default;
+
+        void set_size(size_t new_size)
+        {
+            if (new_size == 0)
+            {
+                data_size = 0;
+                host_current = true;
+                device_current = false;
+                data_host.reset();
+                data_device.reset();
+            }
+            else if (new_size != data_size)
+            {
+                data_size = new_size;
+                host_current = true;
+                device_current = false;
+                data_host.reset(new float[new_size]);
+                data_device.reset();
+            }
+        }
+
+        void async_copy_to_device() 
+        {
+            // TODO
+        }
+
+        void async_copy_to_host() 
+        {
+            // TODO
+        }
+
+        const float* host() const 
+        { 
+            copy_to_host();
+            return data_host.get(); 
+        }
+
+        float* host() 
+        {
+            copy_to_host();
+            device_current = false;
+            return data_host.get(); 
+        }
+
+        const float* device() const 
+        { 
+            copy_to_device();
+            return data_device.get(); 
+        }
+
+        float* device() 
+        {
+            copy_to_device();
+            host_current = false;
+            return data_device.get(); 
+        }
+
+        size_t size() const { return data_size; }
+
+    private:
+
+        void copy_to_device() const
+        {
+            if (!device_current)
+            {
+                // TODO, cudamemcpy()
+                device_current = true;
+            }
+        }
+
+        void copy_to_host() const
+        {
+            if (!host_current)
+            {
+                // TODO, cudamemcpy()
+                host_current = true;
+            }
+        }
+
+        size_t data_size;
+        mutable bool host_current;
+        mutable bool device_current;
+
+        std::unique_ptr<float[]> data_host;
+        std::unique_ptr<float[]> data_device;
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    class tensor
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+        !*/
+
+    public:
+
+        tensor (
+        ) : 
+            m_n(0), m_nr(0), m_nc(0), m_k(0)
+        {
+        }
+
+        inline virtual ~tensor() = 0;
+
+        long num_samples() const { return m_n; }
+        long nr() const { return m_nr; }
+        long nc() const { return m_nc; }
+        long k() const { return m_k; }
+        size_t size() const { return data.size(); }
+
+        void async_copy_to_host() 
+        {
+            data.async_copy_to_host();
+        }
+
+        void async_copy_to_device() 
+        {
+            data.async_copy_to_device();
+        }
+        /*!
+            ensures
+                - begin asynchronously copying this tensor to the GPU.
+
+                NOTE that the "get device pointer" routine in this class
+                will have to do some kind of synchronization that ensures
+                the copy is finished.
+        !*/
+
+        const float* host() const { return data.host(); }
+        float*       host()       { return data.host(); }
+        const float* device() const { return data.device(); }
+        float*       device()       { return data.device(); }
+
+        tensor& operator= (float val)
+        {
+            // TODO, do on the device if that's where the memory is living right now.
+            auto d = data.host();
+            for (size_t i = 0; i < data.size(); ++i)
+                d[i] = val;
+        }
+
+        template <typename EXP>
+        tensor& operator= (const matrix_exp<EXP>& item)
+        {
+            DLIB_CASSERT(num_samples() == item.nr() &&
+                         nr()*nc()*k() == item.nc(),"");
+            static_assert((is_same_type<float, typename EXP::type>::value == true),
+                "To assign a matrix to a tensor the matrix must contain float values");
+
+            set_ptrm(data.host(), m_n, m_nr*m_nc*m_k) = item;
+            return *this;
+        }
+
+        template <typename EXP>
+        tensor& operator+= (const matrix_exp<EXP>& item)
+        {
+            DLIB_CASSERT(num_samples() == item.nr() &&
+                         nr()*nc()*k() == item.nc(),"");
+            static_assert((is_same_type<float, typename EXP::type>::value == true),
+                "To assign a matrix to a tensor the matrix must contain float values");
+            set_ptrm(data.host(), m_n, m_nr*m_nc*m_k) += item;
+            return *this;
+        }
+
+        template <typename EXP>
+        tensor& operator-= (const matrix_exp<EXP>& item)
+        {
+            DLIB_CASSERT(num_samples() == item.nr() &&
+                         nr()*nc()*k() == item.nc(),"");
+            static_assert((is_same_type<float, typename EXP::type>::value == true),
+                "To assign a matrix to a tensor the matrix must contain float values");
+            set_ptrm(data.host(), m_n, m_nr*m_nc*m_k) -= item;
+            return *this;
+        }
+
+        template <typename EXP>
+        void set_sample (
+            unsigned long idx,
+            const matrix_exp<EXP>& item
+        )
+        {
+            DLIB_CASSERT(idx < num_samples(), "");
+            DLIB_CASSERT(item.size() == nr()*nc()*k(), "");
+            static_assert((is_same_type<float, typename EXP::type>::value == true),
+                "To assign a matrix to a tensor the matrix must contain float values");
+            set_ptrm(data.host()+idx*item.size(), item.nr(), item.nc()) = item;
+        }
+
+
+        template <typename EXP>
+        void add_to_sample (
+            unsigned long idx,
+            const matrix_exp<EXP>& item
+        )
+        {
+            DLIB_CASSERT(idx < num_samples(), "");
+            DLIB_CASSERT(item.size() == nr()*nc()*k(), "");
+            static_assert((is_same_type<float, typename EXP::type>::value == true),
+                "To assign a matrix to a tensor the matrix must contain float values");
+            set_ptrm(data.host()+idx*item.size(), item.nr(), item.nc()) += item;
+        }
+
+
+
+
+    protected:
+
+        tensor& operator= (const tensor& item) 
+        {
+            m_n  = item.m_n;
+            m_nr = item.m_nr;
+            m_nc = item.m_nc;
+            m_k  = item.m_k;
+            data.set_size(item.data.size());
+            std::memcpy(data.host(), item.data.host(), data.size()*sizeof(float));
+            return *this;
+        }
+
+        tensor(
+            const tensor& item
+        )  
+        {
+            *this = item;
+        }
+
+        tensor(tensor&& item) = default;
+        tensor& operator=(tensor&& item) = default;
+
+
+        long m_n;
+        long m_nr;
+        long m_nc;
+        long m_k;
+        gpu_data data;
+    };
+
+    tensor::~tensor()
+    {
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    const matrix_op<op_pointer_to_mat<float> > mat (
+        const tensor& t,
+        long nr,
+        long nc
+    )
+    {
+        DLIB_ASSERT(nr > 0 && nc > 0 , 
+                    "\tconst matrix_exp mat(tensor, nr, nc)"
+                    << "\n\t nr and nc must be bigger than 0"
+                    << "\n\t nr: " << nr
+                    << "\n\t nc: " << nc
+        );
+        DLIB_ASSERT(nr*nc == t.size() , 
+                    "\tconst matrix_exp mat(tensor, nr, nc)"
+                    << "\n\t The sizes don't match up."
+                    << "\n\t nr*nc:    " << nr*nc
+                    << "\n\t t.size(): " << t.size()
+        );
+        typedef op_pointer_to_mat<float> op;
+        return matrix_op<op>(op(t.host(),nr,nc));
+    }
+
+    const matrix_op<op_pointer_to_mat<float> > mat (
+        const tensor& t
+    )
+    {
+        DLIB_ASSERT(t.size() != 0, 
+                    "\tconst matrix_exp mat(tensor)"
+                    << "\n\t The tensor can't be empty."
+        );
+
+        return mat(t, t.num_samples(), t.size()/t.num_samples());
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    inline bool have_same_dimensions (
+        const tensor& a,
+        const tensor& b
+    )
+    {
+        return a.num_samples() == b.num_samples() &&
+               a.nr() == b.nr() &&
+               a.nc() == b.nc() &&
+               a.k()  == b.k();
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    class resizable_tensor : public tensor
+    {
+    public:
+        resizable_tensor(
+        )
+        {}
+
+        explicit resizable_tensor(
+            long n_, long nr_ = 1, long nc_ = 1, long k_ = 1
+        ) 
+        {
+            set_size(n_,nr_,nc_,k_);
+        }
+
+        resizable_tensor(const resizable_tensor&) = default;
+        resizable_tensor(resizable_tensor&&) = default;
+
+        void clear(
+        )
+        {
+            set_size(0,0,0,0);
+        }
+
+        void copy_size (
+            const tensor& item
+        )
+        /*!
+            ensures
+                - resizes *this so that: have_same_dimensions(#*this, item)==true
+        !*/
+        {
+            set_size(item.num_samples(), item.nr(), item.nc(), item.k());
+        }
+
+        resizable_tensor& operator= (float val)
+        {
+            tensor::operator=(val);
+            return *this;
+        }
+
+        template <typename EXP>
+        resizable_tensor& operator= (const matrix_exp<EXP>& item)
+        {
+            tensor::operator=(item);
+            return *this;
+        }
+
+        template <typename EXP>
+        resizable_tensor& operator+= (const matrix_exp<EXP>& item)
+        {
+            tensor::operator+=(item);
+            return *this;
+        }
+
+        template <typename EXP>
+        resizable_tensor& operator-= (const matrix_exp<EXP>& item)
+        {
+            tensor::operator-=(item);
+            return *this;
+        }
+
+        template <typename EXP>
+        void set_sample (
+            unsigned long idx,
+            const matrix_exp<EXP>& item
+        )
+        {
+            tensor::set_sample(idx, item);
+        }
+
+        template <typename EXP>
+        void add_to_sample (
+            unsigned long idx,
+            const matrix_exp<EXP>& item
+        )
+        {
+            tensor::add_to_sample(idx, item);
+        }
+
+        resizable_tensor& operator= (const resizable_tensor&) = default;
+        resizable_tensor& operator= (resizable_tensor&&) = default;
+
+        resizable_tensor& operator= (const tensor& x) 
+        {
+            tensor::operator=(x);
+            return *this;
+        }
+
+        void set_size(
+            long n_, long nr_ = 1, long nc_ = 1, long k_ = 1
+        )
+        {
+            m_n = n_;
+            m_nr = nr_;
+            m_nc = nc_;
+            m_k = k_;
+            data.set_size(m_n*m_nr*m_nc*m_k);
+        }
+    };
+
+// ----------------------------------------------------------------------------------------
+
+    inline double dot(
+        const tensor& a,
+        const tensor& b
+    )
+    {
+        DLIB_CASSERT(a.size() == b.size(), "");
+        const float* da = a.host();
+        const float* db = b.host();
+        double sum = 0;
+        for (size_t i = 0; i < a.size(); ++i)
+            sum += da[i]*db[i];
+        return sum;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+}
+
+#endif // DLIB_DNn_TENSOR_H_
+