Add visitor to remove bias from bn_ layer inputs (#closes 2155) (#2156)

* add visitor to remove bias from bn_ inputs (#closes 2155) * remove unused parameter and make documentation more clear * remove bias from bn_ layers too and use better name * let the batch norm keep their bias, use even better name * be more consistent with impl naming * remove default constructor * do not use method to prevent some errors * add disable bias method to pertinent layers * update dcgan example - grammar - print number of network parameters to be able to check bias is not allocated - at the end, give feedback to the user about what the discriminator thinks about each generated sample * fix fc_ logic * add documentation * add bias_is_disabled methods and update to_xml * print use_bias=false when bias is disabled

Add visitor to remove bias from bn_ layer inputs (#closes 2155) (#2156)
* add visitor to remove bias from bn_ inputs (#closes 2155) * remove unused parameter and make documentation more clear * remove bias from bn_ layers too and use better name * let the batch norm keep their bias, use even better name * be more consistent with impl naming * remove default constructor * do not use method to prevent some errors * add disable bias method to pertinent layers * update dcgan example - grammar - print number of network parameters to be able to check bias is not allocated - at the end, give feedback to the user about what the discriminator thinks about each generated sample * fix fc_ logic * add documentation * add bias_is_disabled methods and update to_xml * print use_bias=false when bias is disabled
e7ec6b77 · Adrià Arrufat · GitHub · ed22f040 · e7ec6b77 · e7ec6b77
Unverified Commit e7ec6b77 authored Sep 03, 2020 by Adrià Arrufat Committed by GitHub Sep 02, 2020
5 changed files
--- a/dlib/dnn/core.h
+++ b/dlib/dnn/core.h
@@ -183,6 +183,28 @@ namespace dlib
        impl::set_bias_weight_decay_multiplier(obj, special_(), bias_weight_decay_multiplier);
    }

+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        template <typename T, typename int_<decltype(&T::disable_bias)>::type = 0>
+        void disable_bias(
+            T& obj,
+            special_
+        ) { obj.disable_bias(); }
+
+        template <typename T>
+        void disable_bias( const T& , general_) { }
+    }
+
+    template <typename T>
+    void disable_bias(
+        T& obj
+    )
+    {
+        impl::disable_bias(obj, special_());
+    }
+
 // ----------------------------------------------------------------------------------------

    namespace impl

--- a/dlib/dnn/core_abstract.h
+++ b/dlib/dnn/core_abstract.h
@@ -157,6 +157,20 @@ namespace dlib
                - does nothing
    !*/

+// ----------------------------------------------------------------------------------------
+
+    template <typename T>
+    void disable_bias(
+        T& obj
+    );
+    /*!
+        ensures
+            - if (obj has a disable_bias() member function) then
+                - calls obj.disable_bias()
+            - else
+                - does nothing
+    !*/
+
 // ----------------------------------------------------------------------------------------

    bool dnn_prefer_fastest_algorithms(

--- a/dlib/dnn/layers.h
+++ b/dlib/dnn/layers.h
--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
@@ -573,6 +573,22 @@ namespace dlib
                - #get_bias_weight_decay_multiplier() == val
        !*/

+        void disable_bias(
+        );
+        /*!
+            ensures
+                - bias_is_disabled() returns true
+        !*/
+
+        bool bias_is_disabled(
+        ) const;
+        /*!
+            ensures
+                - returns true if bias learning is disabled for this layer.  This means the biases will
+                  not be learned during the training and they will not be used in the forward or backward
+                  methods either.
+        !*/
+
        alias_tensor_const_instance get_weights(
        ) const;
        /*!
@@ -903,6 +919,22 @@ namespace dlib
                - #get_bias_weight_decay_multiplier() == val
        !*/

+        void disable_bias(
+        );
+        /*!
+            ensures
+                - bias_is_disabled() returns true
+        !*/
+
+        bool bias_is_disabled(
+        ) const;
+        /*!
+            ensures
+                - returns true if bias learning is disabled for this layer.  This means the biases will
+                  not be learned during the training and they will not be used in the forward or backward
+                  methods either.
+        !*/
+
        template <typename SUBNET> void setup (const SUBNET& sub);
        template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
        template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
@@ -1147,6 +1179,22 @@ namespace dlib
                - #get_bias_weight_decay_multiplier() == val
        !*/

+        void disable_bias(
+        );
+        /*!
+            ensures
+                - bias_is_disabled() returns true
+        !*/
+
+        bool bias_is_disabled(
+        ) const;
+        /*!
+            ensures
+                - returns true if bias learning is disabled for this layer.  This means the biases will
+                  not be learned during the training and they will not be used in the forward or backward
+                  methods either.
+        !*/
+
        template <typename SUBNET> void setup (const SUBNET& sub);
        template <typename SUBNET> void forward(const SUBNET& sub, resizable_tensor& output);
        template <typename SUBNET> void backward(const tensor& gradient_input, SUBNET& sub, tensor& params_grad);
@@ -1616,6 +1664,22 @@ namespace dlib
              new_window_size.
    !*/

+// ----------------------------------------------------------------------------------------
+
+    template <typename net_type>
+    void set_all_bn_inputs_no_bias (
+        const net_type& net
+    );
+    /*!
+        requires
+            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
+              add_tag_layer.
+        ensures
+            - Disables bias for all bn_ layer inputs.
+            - Sets the get_bias_learning_rate_multiplier() and get_bias_weight_decay_multiplier()
+              to zero of all bn_ layer inputs.
+    !*/
+
 // ----------------------------------------------------------------------------------------

    class affine_

--- a/examples/dnn_dcgan_train_ex.cpp
+++ b/examples/dnn_dcgan_train_ex.cpp
@@ -10,7 +10,7 @@
    by Alec Radford, Luke Metz, Soumith Chintala.

    The main idea is that there are two neural networks training at the same time:
-    - the generator is in charge of generating images that look as close as possible as the
+    - the generator is in charge of generating images that look as close as possible to the
      ones from the dataset.
    - the discriminator will decide whether an image is fake (created by the generator) or real
      (selected from the dataset).
@@ -35,25 +35,6 @@
 using namespace std;
 using namespace dlib;

-// We start by defining a simple visitor to disable bias learning in a network.  By default,
-// biases are initialized to 0, so setting the multipliers to 0 disables bias learning.
-class visitor_no_bias
-{
-public:
-    template <typename input_layer_type>
-    void operator()(size_t , input_layer_type& ) const
-    {
-        // ignore other layers
-    }
-
-    template <typename T, typename U, typename E>
-    void operator()(size_t , add_layer<T, U, E>& l) const
-    {
-        set_bias_learning_rate_multiplier(l.layer_details(), 0);
-        set_bias_weight_decay_multiplier(l.layer_details(), 0);
-    }
-};
-
 // Some helper definitions for the noise generation
 const size_t noise_size = 100;
 using noise_t = std::array<matrix<float, 1, 1>, noise_size>;
@@ -149,16 +130,15 @@ int main(int argc, char** argv) try

    // Instantiate both generator and discriminator
    generator_type generator;
-    discriminator_type discriminator(
-        leaky_relu_(0.2), leaky_relu_(0.2), leaky_relu_(0.2));
-    // Remove the bias learning from the networks
-    visit_layers(generator, visitor_no_bias());
-    visit_layers(discriminator, visitor_no_bias());
+    discriminator_type discriminator(leaky_relu_(0.2), leaky_relu_(0.2), leaky_relu_(0.2));
+    // Remove the bias learning from all bn_ inputs in both networks
+    set_all_bn_inputs_no_bias(generator);
+    set_all_bn_inputs_no_bias(discriminator);
    // Forward random noise so that we see the tensor size at each layer
    discriminator(generate_image(generator, make_noise(rnd)));
-    cout << "generator" << endl;
+    cout << "generator (" << count_parameters(generator) << " parameters)" << endl;
    cout << generator << endl;
-    cout << "discriminator" << endl;
+    cout << "discriminator (" << count_parameters(discriminator) << " parameters)" << endl;
    cout << discriminator << endl;

    // The solvers for the generator and discriminator networks.  In this example, we are going to
@@ -257,8 +237,11 @@ int main(int argc, char** argv) try
    // output.
    while (!win.is_closed())
    {
-        win.set_image(generate_image(generator, make_noise(rnd)));
-        cout << "Hit enter to generate a new image";
+        const auto image = generate_image(generator, make_noise(rnd));
+        const auto real = discriminator(image) > 0;
+        win.set_image(image);
+        cout << "The discriminator thinks it's " << (real ? "real" : "fake");
+        cout << ". Hit enter to generate a new image";
        cin.get();
    }