Simplified more uses of layer visiting and fixed constness bug

The const bug was introduced yesterday and caused some layer visiting to not work on const networks.

Simplified more uses of layer visiting and fixed constness bug
The const bug was introduced yesterday and caused some layer visiting to not work on const networks.
40c3e488 · Davis King · 5ec60a91 · 40c3e488 · 40c3e488 · 40c3e488
Commit 40c3e488 authored Sep 06, 2020 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 76 additions and 46 deletions

dlib/dnn/core.h dlib/dnn/core.h +20 -4

dlib/dnn/utilities.h dlib/dnn/utilities.h +6 -30

dlib/test/dnn.cpp dlib/test/dnn.cpp +50 -12

No files found.
--- a/dlib/dnn/core.h
+++ b/dlib/dnn/core.h
@@ -3680,13 +3680,21 @@ namespace dlib
        public:
            explicit visitor_computational_layer(visitor& v) : v_(v) {}

-            template <typename T, typename U, typename E>
-            void operator()(size_t idx, add_layer<T,U,E>& l) const
+            template <typename layer>
+            void do_visit(size_t idx, layer& l) const
            {
                // Call whatever version of the visitor the user provided.
                call_if_valid(v_, idx, l.layer_details());
                call_if_valid(v_, l.layer_details());
            }
+
+            // const case
+            template <typename T, typename U, typename E>
+            void operator()(size_t idx, const add_layer<T,U,E>& l) const { do_visit(idx, l); }
+            // non-const cast
+            template <typename T, typename U, typename E>
+            void operator()(size_t idx, add_layer<T,U,E>& l) const { do_visit(idx, l); }
+
        private:

            visitor& v_;
@@ -3771,8 +3779,8 @@ namespace dlib
        public:
            explicit visit_layer_parameter_gradients(visitor& v) : v_(v) {}

-            template <typename T, typename U, typename E>
-            void operator()(add_layer<T,U,E>& l) 
+            template <typename layer>
+            void do_visit(layer& l) 
            {
                // Call whatever version of the visitor the user provided.
                const bool visitor_called = call_if_valid(v_, computational_layer_idx, l.get_parameter_gradient()) ||
@@ -3780,6 +3788,14 @@ namespace dlib
                DLIB_CASSERT(visitor_called, "A visitor function with an incorrect signature was given to visit_layer_parameter_gradients()");
                ++computational_layer_idx;
            }
+
+            // const version
+            template <typename T, typename U, typename E>
+            void operator()(const add_layer<T,U,E>& l) { do_visit(l); }
+            // non-const version
+            template <typename T, typename U, typename E>
+            void operator()(add_layer<T,U,E>& l) { do_visit(l); }
+
        private:

            size_t computational_layer_idx = 0;

--- a/dlib/dnn/utilities.h
+++ b/dlib/dnn/utilities.h
@@ -273,31 +273,13 @@ namespace dlib

 // ----------------------------------------------------------------------------------------

-    namespace impl
-    {
-        class visitor_count_parameters
-        {
-        public:
-            visitor_count_parameters(size_t& num_parameters_) : num_parameters(num_parameters_) {}
-
-            void operator()(size_t, const tensor& t)
-            {
-                num_parameters += t.size();
-            }
-
-        private:
-            size_t& num_parameters;
-        };
-    }
-
    template <typename net_type>
-    inline size_t count_parameters(
+    size_t count_parameters(
        const net_type& net
    )
    {
        size_t num_parameters = 0;
-        impl::visitor_count_parameters temp(num_parameters);
-        visit_layer_parameters(net, temp);
+        visit_layer_parameters(net, [&](const tensor& t) { num_parameters += t.size(); });
        return num_parameters;
    }

@@ -311,16 +293,10 @@ namespace dlib
            visitor_learning_rate_multiplier(double new_learning_rate_multiplier_) :
                new_learning_rate_multiplier(new_learning_rate_multiplier_) {}

-            template <typename input_layer_type>
-            void operator()(size_t , input_layer_type& ) const
-            {
-                // ignore other layers
-            }
-
-            template <typename T, typename U, typename E>
-            void operator()(size_t , add_layer<T,U,E>& l) const
+            template <typename layer>
+            void operator()(layer& l) const
            {
-                set_learning_rate_multiplier(l.layer_details(), new_learning_rate_multiplier);
+                set_learning_rate_multiplier(l, new_learning_rate_multiplier);
            }
                
        private:
@@ -337,7 +313,7 @@ namespace dlib
    {
        DLIB_CASSERT(learning_rate_multiplier >= 0);
        impl::visitor_learning_rate_multiplier temp(learning_rate_multiplier);
-        visit_layers(net, temp);
+        visit_computational_layers(net, temp);
    }

    template <size_t begin, size_t end, typename net_type>

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -2000,31 +2000,69 @@ namespace
            >>>>>>>>>>>>;

        net_type2 pnet;
+        const net_type2& const_pnet = pnet;

        DLIB_TEST_MSG(pnet.num_layers == 132, pnet.num_layers);
        DLIB_TEST_MSG(pnet.num_computational_layers == 110, pnet.num_computational_layers);

-        std::vector<bool> hit(pnet.num_computational_layers, false);
-        size_t count = 0;
-        visit_layer_parameter_gradients(pnet, [&](size_t i, tensor& ){hit[i] = true; ++count; });
-        for (auto x : hit)
-            DLIB_TEST(x);
-        DLIB_TEST(count == pnet.num_computational_layers);
+        {
+            std::vector<bool> hit(pnet.num_computational_layers, false);
+            size_t count = 0;
+            visit_layer_parameter_gradients(pnet, [&](size_t i, tensor& ){hit[i] = true; ++count; });
+            for (auto x : hit)
+                DLIB_TEST(x);
+            DLIB_TEST(count == pnet.num_computational_layers);
+        }
+        {
+            std::vector<bool> hit(pnet.num_computational_layers, false);
+            size_t count = 0;
+            visit_layer_parameter_gradients(const_pnet, [&](size_t i, const tensor& ){hit[i] = true; ++count; });
+            for (auto x : hit)
+                DLIB_TEST(x);
+            DLIB_TEST(count == pnet.num_computational_layers);
+        }

-        count = 0;
-        std::vector<bool> hit2(pnet.num_computational_layers, false);
-        visit_layer_parameters(pnet, [&](size_t i, tensor& ){hit2[i] = true; ++count; });
-        for (auto x : hit2)
-            DLIB_TEST(x);
-        DLIB_TEST(count == pnet.num_computational_layers);
+        {
+            size_t count = 0;
+            std::vector<bool> hit2(pnet.num_computational_layers, false);
+            visit_layer_parameters(pnet, [&](size_t i, tensor& ){hit2[i] = true; ++count; });
+            for (auto x : hit2)
+                DLIB_TEST(x);
+            DLIB_TEST(count == pnet.num_computational_layers);
+        }
+        {
+            size_t count = 0;
+            std::vector<bool> hit2(pnet.num_computational_layers, false);
+            visit_layer_parameters(const_pnet, [&](size_t i, const tensor& ){hit2[i] = true; ++count; });
+            for (auto x : hit2)
+                DLIB_TEST(x);
+            DLIB_TEST(count == pnet.num_computational_layers);
+        }

        int num_relus = 0;
        visit_computational_layers(pnet, [&num_relus](relu_&) { ++num_relus; });
        DLIB_TEST(num_relus == 10);
+        num_relus = 0;
+        visit_computational_layers(const_pnet, [&num_relus](const relu_&) { ++num_relus; });
+        DLIB_TEST(num_relus == 10);
+        num_relus = 0;
+        visit_computational_layers(const_pnet, [&num_relus](relu_&) { ++num_relus; });
+        // Visiting doesn't happen in this case because a const network can't bind the non-const
+        // relu_ reference used above. 
+        DLIB_TEST(num_relus == 0);

        DLIB_TEST(layer<leaky_relu>(pnet).layer_details().get_alpha() == 0.01f);
        visit_computational_layers(pnet, [](leaky_relu_& l) { l = leaky_relu_(0.001f); });
        DLIB_TEST(layer<leaky_relu>(pnet).layer_details().get_alpha() == 0.001f);
+
+        // make sure count_parameters() works since it depends on visiting too.  Initially the
+        // network has 0 parameters.  But once we run something through it it will allocate its
+        // parameters.
+        DLIB_TEST_MSG(count_parameters(pnet) == 0, "count_parameters(pnet): "<< count_parameters(pnet));
+        const matrix<unsigned char> input = zeros_matrix<unsigned char>(40,40);
+        pnet(input);
+        DLIB_TEST_MSG(count_parameters(pnet) == 17606, "count_parameters(pnet): "<< count_parameters(pnet));
+
    }

    float tensor_read_cpu(const tensor& t, long i, long k, long r, long c)