Added loss_dot layer

f1fe908a · Davis King · a0220801 · f1fe908a · f1fe908a · f1fe908a
Commit f1fe908a authored Nov 17, 2017 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 208 additions and 0 deletions

dlib/dnn/loss.h dlib/dnn/loss.h +105 -0

dlib/dnn/loss_abstract.h dlib/dnn/loss_abstract.h +62 -0

dlib/test/dnn.cpp dlib/test/dnn.cpp +41 -0

No files found.
--- a/dlib/dnn/loss.h
+++ b/dlib/dnn/loss.h
@@ -2468,6 +2468,111 @@ namespace dlib

 // ----------------------------------------------------------------------------------------

+    class loss_dot_ 
+    {
+    public:
+
+        typedef matrix<float,0,1> training_label_type;
+        typedef matrix<float,0,1> output_label_type;
+
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const
+        {
+            const tensor& output_tensor = sub.get_output();
+            DLIB_CASSERT(sub.sample_expansion_factor() == 1);
+            DLIB_CASSERT(input_tensor.num_samples() != 0);
+            DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0);
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
+
+            for (long i = 0; i < output_tensor.num_samples(); ++i)
+                *iter++ = trans(rowm(mat(output_tensor),i));
+        }
+
+
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss_value_and_gradient (
+            const tensor& input_tensor,
+            const_label_iterator truth, 
+            SUBNET& sub
+        ) const
+        {
+            const tensor& output_tensor = sub.get_output();
+            tensor& grad = sub.get_gradient_input();
+
+            DLIB_CASSERT(sub.sample_expansion_factor() == 1);
+            DLIB_CASSERT(input_tensor.num_samples() != 0);
+            DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0);
+            DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
+
+            const long network_output_dims = output_tensor.size()/output_tensor.num_samples();
+
+
+            // The loss we output is the average loss over the mini-batch. 
+            const double scale = 1.0/output_tensor.num_samples();
+            double loss = 0;
+            float* g = grad.host();
+            const float* out_data = output_tensor.host();
+            for (long i = 0; i < output_tensor.num_samples(); ++i)
+            {
+                DLIB_CASSERT(truth->size() == network_output_dims, "The network must output a vector with the same dimensionality as the training labels. "
+                    << "\ntruth->size():       " << truth->size()
+                    << "\nnetwork_output_dims: " << network_output_dims); 
+
+                const float* t = &(*truth++)(0);
+
+                for (long j = 0; j < network_output_dims; ++j)
+                {
+                    g[j] = -t[j]*scale;
+                    loss -= out_data[j]*t[j];
+                }
+
+                g += network_output_dims;
+                out_data += network_output_dims;
+            }
+            return loss*scale;
+        }
+
+        friend void serialize(const loss_dot_& , std::ostream& out)
+        {
+            serialize("loss_dot_", out);
+        }
+
+        friend void deserialize(loss_dot_& , std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "loss_dot_")
+                throw serialization_error("Unexpected version found while deserializing dlib::loss_dot_.");
+        }
+
+        friend std::ostream& operator<<(std::ostream& out, const loss_dot_& )
+        {
+            out << "loss_dot";
+            return out;
+        }
+
+        friend void to_xml(const loss_dot_& /*item*/, std::ostream& out)
+        {
+            out << "<loss_dot/>";
+        }
+
+    };
+
+    template <typename SUBNET>
+    using loss_dot = add_loss_layer<loss_dot_, SUBNET>;
+
+// ----------------------------------------------------------------------------------------

 }


--- a/dlib/dnn/loss_abstract.h
+++ b/dlib/dnn/loss_abstract.h
@@ -1250,6 +1250,68 @@ namespace dlib
    template <typename SUBNET>
    using loss_mean_squared_per_pixel = add_loss_layer<loss_mean_squared_per_pixel_, SUBNET>;

+// ----------------------------------------------------------------------------------------
+
+    class loss_dot_ 
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object implements the loss layer interface defined above by
+                EXAMPLE_LOSS_LAYER_.  In particular, selecting this loss means you want
+                maximize the dot product between the output of a network and a set of
+                training vectors.  The loss is therefore the negative dot product.  To be
+                very specific, if X is the output vector of a network and Y is a training
+                label (also a vector), then the loss for this training sample is: -dot(X,Y)
+        !*/
+
+    public:
+
+        typedef matrix<float,0,1> training_label_type;
+        typedef matrix<float,0,1> output_label_type;
+
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
+            it has the additional calling requirements that:
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - sub.sample_expansion_factor() == 1
+            and the output labels are simply the final network outputs stuffed into a
+            vector.  To be very specific, the output is the following for all valid i:
+                *(iter+i) == trans(rowm(mat(sub.get_output()),i))
+        !*/
+
+
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss_value_and_gradient (
+            const tensor& input_tensor,
+            const_label_iterator truth, 
+            SUBNET& sub
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
+            except it has the additional calling requirements that:
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - sub.sample_expansion_factor() == 1
+                - Let NETWORK_OUTPUT_DIMS == sub.get_output().size()/sub.get_output().num_samples()
+                - for all idx such that 0 <= idx < sub.get_output().num_samples():
+                    - NETWORK_OUTPUT_DIMS == (*(truth + idx)).size()
+        !*/
+    };
+
+    template <typename SUBNET>
+    using loss_dot = add_loss_layer<loss_dot_, SUBNET>;
+
 // ----------------------------------------------------------------------------------------

 }

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -3009,6 +3009,46 @@ namespace
        dlib::deserialize(net2, in);
    }

+// ----------------------------------------------------------------------------------------
+
+    void test_loss_dot()
+    {
+        print_spinner();
+
+        std::vector<matrix<float,0,1>> samples;
+        std::vector<matrix<float,0,1>> labels;
+
+        const matrix<float> proj = matrix_cast<float>(randm(2,3));
+        for (int i = 0; i < 128; ++i)
+        {
+            // The task is going to be to learn the matrix proj.  So we make our
+            // training data thusly:
+            matrix<float,0,1> x = matrix_cast<float>(randm(3,1));
+            matrix<float,0,1> y = normalize(proj*x);
+            samples.push_back(x);
+            labels.push_back(y);
+        }
+
+        using net_type = loss_dot<
+            l2normalize<fc_no_bias<2, 
+            input<matrix<float,0,1>> 
+            >>>;
+
+        net_type net;
+        dnn_trainer<net_type> trainer(net, sgd(1e-4, 0.9));
+        trainer.set_learning_rate(0.01);
+        trainer.set_min_learning_rate(0.0000001);
+        trainer.set_mini_batch_size(128);
+        trainer.set_max_num_epochs(50000);
+        trainer.train(samples, labels);
+
+
+        for (size_t i = 0; i < samples.size(); ++i)
+        {
+            DLIB_TEST(std::abs(1-dot(net(samples[i]),labels[i])) < 0.001);
+        }
+    }
+
 // ----------------------------------------------------------------------------------------

    class dnn_tester : public tester
@@ -3095,6 +3135,7 @@ namespace
            test_loss_multiclass_per_pixel_with_noise_and_pixels_to_ignore();
            test_loss_multiclass_per_pixel_weighted();
            test_serialization();
+            test_loss_dot();
        }

        void perform_test()