Refactor visitors into their own header (#2533)

* Refactor visitors into their own header * Update visitor links in term index * Move documentation from layers_abstract

Refactor visitors into their own header (#2533)
* Refactor visitors into their own header * Update visitor links in term index * Move documentation from layers_abstract
12f1b3a3 · Adrià Arrufat · GitHub · 736b4931 · 12f1b3a3 · 12f1b3a3
Unverified Commit 12f1b3a3 authored Mar 05, 2022 by Adrià Arrufat Committed by GitHub Mar 05, 2022
11 changed files
--- a/dlib/cuda/cpu_dlib.h
+++ b/dlib/cuda/cpu_dlib.h
@@ -8,7 +8,7 @@

 #include "tensor.h"
 #include "../geometry/rectangle.h"
-#include "../dnn/misc.h"
+#include "../dnn/utilities.h"

 namespace dlib
 {

--- a/dlib/cuda/cuda_dlib.h
+++ b/dlib/cuda/cuda_dlib.h
@@ -6,7 +6,7 @@

 #include "tensor.h"
 #include "../geometry/rectangle.h"
-#include "../dnn/misc.h"
+#include "../dnn/utilities.h"

 namespace dlib
 {

--- a/dlib/dnn/layers.h
+++ b/dlib/dnn/layers.h
@@ -1838,23 +1838,6 @@ namespace dlib
        };
    }

-    template <typename net_type>
-    void set_all_bn_running_stats_window_sizes (
-        net_type& net,
-        unsigned long new_window_size
-    )
-    {
-        visit_layers(net, impl::visitor_bn_running_stats_window_size(new_window_size));
-    }
-
-    template <typename net_type>
-    void disable_duplicative_biases (
-        net_type& net
-    )
-    {
-        visit_layers(net, impl::visitor_disable_input_bias());
-    }
-
 // ----------------------------------------------------------------------------------------

    enum fc_bias_mode
@@ -4428,83 +4411,6 @@ namespace dlib
    template <typename SUBNET>
    using reorg = add_layer<reorg_<2, 2>, SUBNET>;

-// ----------------------------------------------------------------------------------------
-
-    namespace impl
-    {
-        class visitor_fuse_layers
-        {
-            public:
-            template <typename T>
-            void fuse_convolution(T&) const
-            {
-                // disable other layer types
-            }
-
-            // handle the standard case (convolutional layer followed by affine;
-            template <long nf, long nr, long nc, int sy, int sx, int py, int px, typename U, typename E>
-            void fuse_convolution(add_layer<affine_, add_layer<con_<nf, nr, nc, sy, sx, py, px>, U>, E>& l)
-            {
-                if (l.layer_details().is_disabled())
-                    return;
-
-                // get the convolution below the affine layer
-                auto& conv = l.subnet().layer_details();
-
-                // get the parameters from the affine layer as alias_tensor_instance
-                alias_tensor_instance gamma = l.layer_details().get_gamma();
-                alias_tensor_instance beta = l.layer_details().get_beta();
-
-                if (conv.bias_is_disabled())
-                {
-                    conv.enable_bias();
-                }
-
-                tensor& params = conv.get_layer_params();
-
-                // update the biases
-                auto biases = alias_tensor(1, conv.num_filters());
-                biases(params, params.size() - conv.num_filters()) += mat(beta);
-
-                // guess the number of input channels
-                const long k_in = (params.size() - conv.num_filters()) / conv.num_filters() / conv.nr() / conv.nc();
-
-                // rescale the filters
-                DLIB_CASSERT(conv.num_filters() == gamma.k());
-                alias_tensor filter(1, k_in, conv.nr(), conv.nc());
-                const float* g = gamma.host();
-                for (long n = 0; n < conv.num_filters(); ++n)
-                {
-                    filter(params, n * filter.size()) *= g[n];
-                }
-
-                // disable the affine layer
-                l.layer_details().disable();
-            }
-
-            template <typename input_layer_type>
-            void operator()(size_t , input_layer_type& ) const
-            {
-                // ignore other layers
-            }
-
-            template <typename T, typename U, typename E>
-            void operator()(size_t , add_layer<T, U, E>& l)
-            {
-                fuse_convolution(l);
-            }
-        };
-    }
-
-    template <typename net_type>
-    void fuse_layers (
-        net_type& net
-    )
-    {
-        DLIB_CASSERT(count_parameters(net) > 0, "The network has to be allocated before fusing the layers.");
-        visit_layers(net, impl::visitor_fuse_layers());
-    }
-
 // ----------------------------------------------------------------------------------------

 }

--- a/dlib/dnn/layers_abstract.h
+++ b/dlib/dnn/layers_abstract.h
@@ -1821,23 +1821,6 @@ namespace dlib
    template <typename SUBNET>
    using bn_fc = add_layer<bn_<FC_MODE>, SUBNET>;

-// ----------------------------------------------------------------------------------------
-
-    template <typename net_type>
-    void set_all_bn_running_stats_window_sizes (
-        const net_type& net,
-        unsigned long new_window_size
-    );
-    /*!
-        requires
-            - new_window_size > 0
-            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
-              add_tag_layer.
-        ensures
-            - Sets the get_running_stats_window_size() field of all bn_ layers in net to
-              new_window_size.
-    !*/
-
 // ----------------------------------------------------------------------------------------

    template <typename net_type>
@@ -3353,23 +3336,6 @@ namespace dlib
    template <typename SUBNET>
    using reorg = add_layer<reorg_<2, 2>, SUBNET>;

-// ----------------------------------------------------------------------------------------
-
-    template <typename net_type>
-    void fuse_layers (
-        net_type& net
-    );
-    /*!
-        requires
-            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
-              add_tag_layer.
-            - net has been properly allocated, that is: count_parameters(net) > 0.
-        ensures
-            - Disables all the affine_ layers that have a convolution as an input.
-            - Updates the convolution weights beneath the affine_ layers to produce the same
-              output as with the affine_ layers enabled.
-    !*/
-
 // ----------------------------------------------------------------------------------------

 }

--- a/dlib/dnn/loss.h
+++ b/dlib/dnn/loss.h
@@ -6,7 +6,6 @@
 #include "loss_abstract.h"
 #include "core.h"
 #include "utilities.h"
-#include "misc.h"
 #include "../matrix.h"
 #include "../cuda/tensor_tools.h"
 #include "../geometry.h"

--- a/dlib/dnn/misc.h
+++ b/dlib/dnn/misc.h
-// Copyright (C) 2020  Davis E. King (davis@dlib.net)
-// License: Boost Software License   See LICENSE.txt for the full license.
-#ifndef DLIB_DNn_MISC_h
-#define DLIB_DNn_MISC_h
-
-#include "../cuda/tensor.h"
-
-namespace dlib
-{
-
-// ----------------------------------------------------------------------------------------
-
-    template <typename label_type>
-    struct weighted_label
-    {
-        weighted_label()
-        {}
-
-        weighted_label(label_type label, float weight = 1.f)
-            : label(label), weight(weight)
-        {}
-
-        label_type label{};
-        float weight = 1.f;
-    };
-
-// ----------------------------------------------------------------------------------------
-
-    inline double log1pexp(double x)
-    {
-        using std::exp;
-        using namespace std; // Do this instead of using std::log1p because some compilers
-                             // error out otherwise (E.g. gcc 4.9 in cygwin)
-        if (x <= -37)
-            return exp(x);
-        else if (-37 < x && x <= 18)
-            return log1p(exp(x));
-        else if (18 < x && x <= 33.3)
-            return x + exp(-x);
-        else
-            return x;
-    }
-
-// ----------------------------------------------------------------------------------------
-
-    template <typename T>
-    T safe_log(T input, T epsilon = 1e-10)
-    {
-        // Prevent trying to calculate the logarithm of a very small number (let alone zero)
-        return std::log(std::max(input, epsilon));
-    }
-
-// ----------------------------------------------------------------------------------------
-
-    static size_t tensor_index(
-        const tensor& t,
-        const long sample,
-        const long k,
-        const long r,
-        const long c
-    )
-    {
-        return ((sample * t.k() + k) * t.nr() + r) * t.nc() + c;
-    }
-
-// ----------------------------------------------------------------------------------------
-
-}
-
-#endif // DLIB_DNn_MISC_h
-
--- a/dlib/dnn/utilities.h
+++ b/dlib/dnn/utilities.h
@@ -3,7 +3,7 @@
 #ifndef DLIB_DNn_UTILITIES_H_
 #define DLIB_DNn_UTILITIES_H_

-#include "core.h"
+#include "../cuda/tensor.h"
 #include "utilities_abstract.h"
 #include "../geometry.h"
 #include <fstream>
@@ -31,306 +31,61 @@ namespace dlib

 // ----------------------------------------------------------------------------------------

-    namespace impl
+    template <typename label_type>
+    struct weighted_label
    {
-        class visitor_net_to_xml
-        {
-        public:
-
-            visitor_net_to_xml(std::ostream& out_) : out(out_) {}
-
-            template<typename input_layer_type>
-            void operator()(size_t idx, const input_layer_type& l) 
-            {
-                out << "<layer idx='"<<idx<<"' type='input'>\n";
-                to_xml(l,out);
-                out << "</layer>\n";
-            }
+        weighted_label()
+        {}

-            template <typename T, typename U>
-            void operator()(size_t idx, const add_loss_layer<T,U>& l) 
-            {
-                out << "<layer idx='"<<idx<<"' type='loss'>\n";
-                to_xml(l.loss_details(),out);
-                out << "</layer>\n";
-            }
+        weighted_label(label_type label, float weight = 1.f)
+            : label(label), weight(weight)
+        {}

-            template <typename T, typename U, typename E>
-            void operator()(size_t idx, const add_layer<T,U,E>& l) 
-            {
-                out << "<layer idx='"<<idx<<"' type='comp'>\n";
-                to_xml(l.layer_details(),out);
-                out << "</layer>\n";
-            }
-
-            template <unsigned long ID, typename U, typename E>
-            void operator()(size_t idx, const add_tag_layer<ID,U,E>& /*l*/) 
-            {
-                out << "<layer idx='"<<idx<<"' type='tag' id='"<<ID<<"'/>\n";
-            }
-
-            template <template<typename> class T, typename U>
-            void operator()(size_t idx, const add_skip_layer<T,U>& /*l*/) 
-            {
-                out << "<layer idx='"<<idx<<"' type='skip' id='"<<(tag_id<T>::id)<<"'/>\n";
-            }
-
-        private:
-
-            std::ostream& out;
+        label_type label{};
+        float weight = 1.f;
    };
-    }
-
-    template <typename net_type>
-    void net_to_xml (
-        const net_type& net,
-        std::ostream& out
-    )
-    {
-        auto old_precision = out.precision(9);
-        out << "<net>\n";
-        visit_layers(net, impl::visitor_net_to_xml(out));
-        out << "</net>\n";
-        // restore the original stream precision.
-        out.precision(old_precision);
-    }
-
-    template <typename net_type>
-    void net_to_xml (
-        const net_type& net,
-        const std::string& filename
-    )
-    {
-        std::ofstream fout(filename);
-        net_to_xml(net, fout);
-    }

 // ----------------------------------------------------------------------------------------

-    namespace impl
-    {
-
-        class visitor_net_map_input_to_output
-        {
-        public:
-
-            visitor_net_map_input_to_output(dpoint& p_) : p(p_) {}
-
-            dpoint& p;
-
-            template<typename input_layer_type>
-            void operator()(const input_layer_type& ) 
-            {
-            }
-
-            template <typename T, typename U>
-            void operator()(const add_loss_layer<T,U>& net) 
+    inline double log1pexp(double x)
    {
-                (*this)(net.subnet());
-            }
-
-            template <typename T, typename U, typename E>
-            void operator()(const add_layer<T,U,E>& net) 
-            {
-                (*this)(net.subnet());
-                p = net.layer_details().map_input_to_output(p);
-            }
-            template <bool B, typename T, typename U, typename E>
-            void operator()(const dimpl::subnet_wrapper<add_layer<T,U,E>,B>& net) 
-            {
-                (*this)(net.subnet());
-                p = net.layer_details().map_input_to_output(p);
-            }
-            template <size_t N, template <typename> class R, typename U>
-            void operator()(const repeat<N, R, U>& net)
-            {
-                (*this)(net.subnet());
-                for (size_t i = 0; i < N; ++i)
-                {
-                    (*this)(net.get_repeated_layer(N-1-i).subnet());
-                }
-            }
-
-
-            template <unsigned long ID, typename U, typename E>
-            void operator()(const add_tag_layer<ID,U,E>& net) 
-            {
-                // tag layers are an identity transform, so do nothing
-                (*this)(net.subnet());
-            }
-            template <bool is_first, unsigned long ID, typename U, typename E>
-            void operator()(const dimpl::subnet_wrapper<add_tag_layer<ID,U,E>,is_first>& net) 
-            {
-                // tag layers are an identity transform, so do nothing
-                (*this)(net.subnet());
-            }
-
-
-            template <template<typename> class TAG_TYPE, typename U>
-            void operator()(const add_skip_layer<TAG_TYPE,U>& net) 
-            {
-                (*this)(layer<TAG_TYPE>(net));
-            }
-            template <bool is_first, template<typename> class TAG_TYPE, typename SUBNET>
-            void operator()(const dimpl::subnet_wrapper<add_skip_layer<TAG_TYPE,SUBNET>,is_first>& net) 
-            {
-                // skip layers are an identity transform, so do nothing
-                (*this)(layer<TAG_TYPE>(net));
-            }
-
-        };
-
-        class visitor_net_map_output_to_input
-        {
-        public:
-            visitor_net_map_output_to_input(dpoint& p_) : p(p_) {}
-
-            dpoint& p;
-
-            template<typename input_layer_type>
-            void operator()(const input_layer_type& ) 
-            {
-            }
-
-            template <typename T, typename U>
-            void operator()(const add_loss_layer<T,U>& net) 
-            {
-                (*this)(net.subnet());
-            }
-
-            template <typename T, typename U, typename E>
-            void operator()(const add_layer<T,U,E>& net) 
-            {
-                p = net.layer_details().map_output_to_input(p);
-                (*this)(net.subnet());
-            }
-            template <bool B, typename T, typename U, typename E>
-            void operator()(const dimpl::subnet_wrapper<add_layer<T,U,E>,B>& net) 
-            {
-                p = net.layer_details().map_output_to_input(p);
-                (*this)(net.subnet());
-            }
-            template <size_t N, template <typename> class R, typename U>
-            void operator()(const repeat<N, R, U>& net)
-            {
-                for (size_t i = 0; i < N; ++i)
-                {
-                    (*this)(net.get_repeated_layer(i).subnet());
-                }
-                (*this)(net.subnet());
-            }
-
-
-            template <unsigned long ID, typename U, typename E>
-            void operator()(const add_tag_layer<ID,U,E>& net) 
-            {
-                // tag layers are an identity transform, so do nothing
-                (*this)(net.subnet());
-            }
-            template <bool is_first, unsigned long ID, typename U, typename E>
-            void operator()(const dimpl::subnet_wrapper<add_tag_layer<ID,U,E>,is_first>& net) 
-            {
-                // tag layers are an identity transform, so do nothing
-                (*this)(net.subnet());
-            }
-
-
-            template <template<typename> class TAG_TYPE, typename U>
-            void operator()(const add_skip_layer<TAG_TYPE,U>& net) 
-            {
-                (*this)(layer<TAG_TYPE>(net));
-            }
-            template <bool is_first, template<typename> class TAG_TYPE, typename SUBNET>
-            void operator()(const dimpl::subnet_wrapper<add_skip_layer<TAG_TYPE,SUBNET>,is_first>& net) 
-            {
-                // skip layers are an identity transform, so do nothing
-                (*this)(layer<TAG_TYPE>(net));
-            }
-
-        };
-    }
-
-    template <typename net_type>
-    inline dpoint input_tensor_to_output_tensor(
-        const net_type& net,
-        dpoint p 
-    )
-    {
-        impl::visitor_net_map_input_to_output temp(p);
-        temp(net);
-        return p;
-    }
-
-    template <typename net_type>
-    inline dpoint output_tensor_to_input_tensor(
-        const net_type& net,
-        dpoint p  
-    )
-    {
-        impl::visitor_net_map_output_to_input temp(p);
-        temp(net);
-        return p;
+        using std::exp;
+        using namespace std; // Do this instead of using std::log1p because some compilers
+                             // error out otherwise (E.g. gcc 4.9 in cygwin)
+        if (x <= -37)
+            return exp(x);
+        else if (-37 < x && x <= 18)
+            return log1p(exp(x));
+        else if (18 < x && x <= 33.3)
+            return x + exp(-x);
+        else
+            return x;
    }

 // ----------------------------------------------------------------------------------------

-    template <typename net_type>
-    size_t count_parameters(
-        const net_type& net
-    )
+    template <typename T>
+    T safe_log(T input, T epsilon = 1e-10)
    {
-        size_t num_parameters = 0;
-        visit_layer_parameters(net, [&](const tensor& t) { num_parameters += t.size(); });
-        return num_parameters;
+        // Prevent trying to calculate the logarithm of a very small number (let alone zero)
+        return std::log(std::max(input, epsilon));
    }

 // ----------------------------------------------------------------------------------------

-    namespace impl
-    {
-        class visitor_learning_rate_multiplier
-        {
-        public:
-            visitor_learning_rate_multiplier(double new_learning_rate_multiplier_) :
-                new_learning_rate_multiplier(new_learning_rate_multiplier_) {}
-
-            template <typename layer>
-            void operator()(layer& l) const
-            {
-                set_learning_rate_multiplier(l, new_learning_rate_multiplier);
-            }
-
-        private:
-
-            double new_learning_rate_multiplier;
-        };
-    }
-
-    template <typename net_type>
-    void set_all_learning_rate_multipliers(
-        net_type& net,
-        double learning_rate_multiplier
+    static size_t tensor_index(
+        const tensor& t,
+        const long sample,
+        const long k,
+        const long r,
+        const long c
    )
    {
-        DLIB_CASSERT(learning_rate_multiplier >= 0);
-        impl::visitor_learning_rate_multiplier temp(learning_rate_multiplier);
-        visit_computational_layers(net, temp);
-    }
-
-    template <size_t begin, size_t end, typename net_type>
-    void set_learning_rate_multipliers_range(
-        net_type& net,
-        double learning_rate_multiplier
-    )
-    {
-        static_assert(begin <= end, "Invalid range");
-        static_assert(end <= net_type::num_layers, "Invalid range");
-        DLIB_CASSERT(learning_rate_multiplier >= 0);
-        impl::visitor_learning_rate_multiplier temp(learning_rate_multiplier);
-        visit_computational_layers_range<begin, end>(net, temp);
+        return ((sample * t.k() + k) * t.nr() + r) * t.nc() + c;
    }

 // ----------------------------------------------------------------------------------------
+
 }

 #endif // DLIB_DNn_UTILITIES_H_ 

--- a/dlib/dnn/utilities_abstract.h
+++ b/dlib/dnn/utilities_abstract.h
@@ -18,8 +18,6 @@ namespace dlib
        ensures
            - returns log(1+exp(x))
              (except computes it using a numerically accurate method)
-
-        NOTE: For technical reasons, it is defined in misc.h.
    !*/

 // ----------------------------------------------------------------------------------------
@@ -41,136 +39,6 @@ namespace dlib
              layer that uses params as its parameters.
    !*/

-// ----------------------------------------------------------------------------------------
-
-    template <typename net_type>
-    void net_to_xml (
-        const net_type& net,
-        std::ostream& out
-    );
-    /*!
-        requires
-            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
-              add_tag_layer.
-            - All layers in the net must provide to_xml() functions.
-        ensures
-            - Prints the given neural network object as an XML document to the given output
-              stream.
-    !*/
-
-    template <typename net_type>
-    void net_to_xml (
-        const net_type& net,
-        const std::string& filename
-    );
-    /*!
-        requires
-            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
-              add_tag_layer.
-            - All layers in the net must provide to_xml() functions.
-        ensures
-            - This function is just like the above net_to_xml(), except it writes to a file
-              rather than an ostream.
-    !*/
-
-// ----------------------------------------------------------------------------------------
-
-    template <typename net_type>
-    dpoint input_tensor_to_output_tensor(
-        const net_type& net,
-        dpoint p 
-    );
-    /*!
-        requires
-            - net_type is an object of type add_layer, add_skip_layer, or add_tag_layer.
-            - All layers in the net must provide map_input_to_output() functions.
-        ensures
-            - Given a dpoint (i.e. a row,column coordinate) in the input tensor given to
-              net, this function returns the corresponding dpoint in the output tensor
-              net.get_output().  This kind of mapping is useful when working with fully
-              convolutional networks as you will often want to know what parts of the
-              output feature maps correspond to what parts of the input.
-            - If the network contains skip layers then any layers skipped over by the skip
-              layer are ignored for the purpose of computing this coordinate mapping.  That
-              is, if you walk the network from the output layer to the input layer, where
-              each time you encounter a skip layer you jump to the layer indicated by the
-              skip layer, you will visit exactly the layers in the network involved in the
-              input_tensor_to_output_tensor() calculation. This behavior is useful since it
-              allows you to compute some auxiliary DNN as a separate branch of computation
-              that is separate from the main network's job of running some kind of fully
-              convolutional network over an image.  For instance, you might want to have a
-              branch in your network that computes some global image level
-              summarization/feature.
-    !*/
-
-// ----------------------------------------------------------------------------------------
-
-    template <typename net_type>
-    dpoint output_tensor_to_input_tensor(
-        const net_type& net,
-        dpoint p  
-    );
-    /*!
-        requires
-            - net_type is an object of type add_layer, add_skip_layer, or add_tag_layer.
-            - All layers in the net must provide map_output_to_input() functions.
-        ensures
-            - This function provides the reverse mapping of input_tensor_to_output_tensor().
-              That is, given a dpoint in net.get_output(), what is the corresponding dpoint
-              in the input tensor?
-    !*/
-
-// ----------------------------------------------------------------------------------------
-
-    template <typename net_type>
-    inline size_t count_parameters(
-        const net_type& net
-    );
-    /*!
-        requires
-            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
-              add_tag_layer.
-        ensures
-            - Returns the number of allocated parameters in the network. E.g. if the network has not
-              been trained then, since nothing has been allocated yet, it will return 0.
-    !*/
-
-// ----------------------------------------------------------------------------------------
-
-    template<typename net_type>
-    void set_all_learning_rate_multipliers(
-        net_type& net,
-        double learning_rate_multiplier
-    );
-    /*!
-        requires
-            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
-              add_tag_layer.
-            - learning_rate_multiplier >= 0
-        ensures
-            - Sets all learning_rate_multipliers and bias_learning_rate_multipliers in net
-              to learning_rate_multiplier.
-    !*/
-
-// ----------------------------------------------------------------------------------------
-
-    template <size_t begin, size_t end, typename net_type>
-    void set_learning_rate_multipliers_range(
-        net_type& net,
-        double learning_rate_multiplier
-    );
-    /*!
-        requires
-            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
-              add_tag_layer.
-            - learning_rate_multiplier >= 0
-            - begin <= end <= net_type::num_layers
-        ensures
-            - Loops over the layers in the range [begin,end) in net and calls
-              set_learning_rate_multiplier on them with the value of
-              learning_rate_multiplier.
-    !*/
-
 // ----------------------------------------------------------------------------------------
 }


--- a/dlib/dnn/visitors.h
+++ b/dlib/dnn/visitors.h
@@ -9,6 +9,408 @@

 namespace dlib
 {
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+
+        class visitor_net_map_input_to_output
+        {
+        public:
+
+            visitor_net_map_input_to_output(dpoint& p_) : p(p_) {}
+
+            dpoint& p;
+
+            template<typename input_layer_type>
+            void operator()(const input_layer_type& ) 
+            {
+            }
+
+            template <typename T, typename U>
+            void operator()(const add_loss_layer<T,U>& net) 
+            {
+                (*this)(net.subnet());
+            }
+
+            template <typename T, typename U, typename E>
+            void operator()(const add_layer<T,U,E>& net) 
+            {
+                (*this)(net.subnet());
+                p = net.layer_details().map_input_to_output(p);
+            }
+            template <bool B, typename T, typename U, typename E>
+            void operator()(const dimpl::subnet_wrapper<add_layer<T,U,E>,B>& net) 
+            {
+                (*this)(net.subnet());
+                p = net.layer_details().map_input_to_output(p);
+            }
+            template <size_t N, template <typename> class R, typename U>
+            void operator()(const repeat<N, R, U>& net)
+            {
+                (*this)(net.subnet());
+                for (size_t i = 0; i < N; ++i)
+                {
+                    (*this)(net.get_repeated_layer(N-1-i).subnet());
+                }
+            }
+
+
+            template <unsigned long ID, typename U, typename E>
+            void operator()(const add_tag_layer<ID,U,E>& net) 
+            {
+                // tag layers are an identity transform, so do nothing
+                (*this)(net.subnet());
+            }
+            template <bool is_first, unsigned long ID, typename U, typename E>
+            void operator()(const dimpl::subnet_wrapper<add_tag_layer<ID,U,E>,is_first>& net) 
+            {
+                // tag layers are an identity transform, so do nothing
+                (*this)(net.subnet());
+            }
+
+
+            template <template<typename> class TAG_TYPE, typename U>
+            void operator()(const add_skip_layer<TAG_TYPE,U>& net) 
+            {
+                (*this)(layer<TAG_TYPE>(net));
+            }
+            template <bool is_first, template<typename> class TAG_TYPE, typename SUBNET>
+            void operator()(const dimpl::subnet_wrapper<add_skip_layer<TAG_TYPE,SUBNET>,is_first>& net) 
+            {
+                // skip layers are an identity transform, so do nothing
+                (*this)(layer<TAG_TYPE>(net));
+            }
+
+        };
+
+        class visitor_net_map_output_to_input
+        {
+        public:
+            visitor_net_map_output_to_input(dpoint& p_) : p(p_) {}
+
+            dpoint& p;
+
+            template<typename input_layer_type>
+            void operator()(const input_layer_type& ) 
+            {
+            }
+
+            template <typename T, typename U>
+            void operator()(const add_loss_layer<T,U>& net) 
+            {
+                (*this)(net.subnet());
+            }
+
+            template <typename T, typename U, typename E>
+            void operator()(const add_layer<T,U,E>& net) 
+            {
+                p = net.layer_details().map_output_to_input(p);
+                (*this)(net.subnet());
+            }
+            template <bool B, typename T, typename U, typename E>
+            void operator()(const dimpl::subnet_wrapper<add_layer<T,U,E>,B>& net) 
+            {
+                p = net.layer_details().map_output_to_input(p);
+                (*this)(net.subnet());
+            }
+            template <size_t N, template <typename> class R, typename U>
+            void operator()(const repeat<N, R, U>& net)
+            {
+                for (size_t i = 0; i < N; ++i)
+                {
+                    (*this)(net.get_repeated_layer(i).subnet());
+                }
+                (*this)(net.subnet());
+            }
+
+
+            template <unsigned long ID, typename U, typename E>
+            void operator()(const add_tag_layer<ID,U,E>& net) 
+            {
+                // tag layers are an identity transform, so do nothing
+                (*this)(net.subnet());
+            }
+            template <bool is_first, unsigned long ID, typename U, typename E>
+            void operator()(const dimpl::subnet_wrapper<add_tag_layer<ID,U,E>,is_first>& net) 
+            {
+                // tag layers are an identity transform, so do nothing
+                (*this)(net.subnet());
+            }
+
+
+            template <template<typename> class TAG_TYPE, typename U>
+            void operator()(const add_skip_layer<TAG_TYPE,U>& net) 
+            {
+                (*this)(layer<TAG_TYPE>(net));
+            }
+            template <bool is_first, template<typename> class TAG_TYPE, typename SUBNET>
+            void operator()(const dimpl::subnet_wrapper<add_skip_layer<TAG_TYPE,SUBNET>,is_first>& net) 
+            {
+                // skip layers are an identity transform, so do nothing
+                (*this)(layer<TAG_TYPE>(net));
+            }
+
+        };
+    }
+
+    template <typename net_type>
+    inline dpoint input_tensor_to_output_tensor(
+        const net_type& net,
+        dpoint p 
+    )
+    {
+        impl::visitor_net_map_input_to_output temp(p);
+        temp(net);
+        return p;
+    }
+
+    template <typename net_type>
+    inline dpoint output_tensor_to_input_tensor(
+        const net_type& net,
+        dpoint p  
+    )
+    {
+        impl::visitor_net_map_output_to_input temp(p);
+        temp(net);
+        return p;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename net_type>
+    size_t count_parameters(
+        const net_type& net
+    )
+    {
+        size_t num_parameters = 0;
+        visit_layer_parameters(net, [&](const tensor& t) { num_parameters += t.size(); });
+        return num_parameters;
+    }
+
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        class visitor_learning_rate_multiplier
+        {
+        public:
+            visitor_learning_rate_multiplier(double new_learning_rate_multiplier_) :
+                new_learning_rate_multiplier(new_learning_rate_multiplier_) {}
+
+            template <typename layer>
+            void operator()(layer& l) const
+            {
+                set_learning_rate_multiplier(l, new_learning_rate_multiplier);
+            }
+
+        private:
+
+            double new_learning_rate_multiplier;
+        };
+    }
+
+    template <typename net_type>
+    void set_all_learning_rate_multipliers(
+        net_type& net,
+        double learning_rate_multiplier
+    )
+    {
+        DLIB_CASSERT(learning_rate_multiplier >= 0);
+        impl::visitor_learning_rate_multiplier temp(learning_rate_multiplier);
+        visit_computational_layers(net, temp);
+    }
+
+    template <size_t begin, size_t end, typename net_type>
+    void set_learning_rate_multipliers_range(
+        net_type& net,
+        double learning_rate_multiplier
+    )
+    {
+        static_assert(begin <= end, "Invalid range");
+        static_assert(end <= net_type::num_layers, "Invalid range");
+        DLIB_CASSERT(learning_rate_multiplier >= 0);
+        impl::visitor_learning_rate_multiplier temp(learning_rate_multiplier);
+        visit_computational_layers_range<begin, end>(net, temp);
+    }
+
+// ----------------------------------------------------------------------------------------
+    template <typename net_type>
+    void set_all_bn_running_stats_window_sizes (
+        net_type& net,
+        unsigned long new_window_size
+    )
+    {
+        visit_layers(net, impl::visitor_bn_running_stats_window_size(new_window_size));
+    }
+
+    template <typename net_type>
+    void disable_duplicative_biases (
+        net_type& net
+    )
+    {
+        visit_layers(net, impl::visitor_disable_input_bias());
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        class visitor_fuse_layers
+        {
+            public:
+            template <typename T>
+            void fuse_convolution(T&) const
+            {
+                // disable other layer types
+            }
+
+            // handle the standard case (convolutional layer followed by affine;
+            template <long nf, long nr, long nc, int sy, int sx, int py, int px, typename U, typename E>
+            void fuse_convolution(add_layer<affine_, add_layer<con_<nf, nr, nc, sy, sx, py, px>, U>, E>& l)
+            {
+                if (l.layer_details().is_disabled())
+                    return;
+
+                // get the convolution below the affine layer
+                auto& conv = l.subnet().layer_details();
+
+                // get the parameters from the affine layer as alias_tensor_instance
+                alias_tensor_instance gamma = l.layer_details().get_gamma();
+                alias_tensor_instance beta = l.layer_details().get_beta();
+
+                if (conv.bias_is_disabled())
+                {
+                    conv.enable_bias();
+                }
+
+                tensor& params = conv.get_layer_params();
+
+                // update the biases
+                auto biases = alias_tensor(1, conv.num_filters());
+                biases(params, params.size() - conv.num_filters()) += mat(beta);
+
+                // guess the number of input channels
+                const long k_in = (params.size() - conv.num_filters()) / conv.num_filters() / conv.nr() / conv.nc();
+
+                // rescale the filters
+                DLIB_CASSERT(conv.num_filters() == gamma.k());
+                alias_tensor filter(1, k_in, conv.nr(), conv.nc());
+                const float* g = gamma.host();
+                for (long n = 0; n < conv.num_filters(); ++n)
+                {
+                    filter(params, n * filter.size()) *= g[n];
+                }
+
+                // disable the affine layer
+                l.layer_details().disable();
+            }
+
+            template <typename input_layer_type>
+            void operator()(size_t , input_layer_type& ) const
+            {
+                // ignore other layers
+            }
+
+            template <typename T, typename U, typename E>
+            void operator()(size_t , add_layer<T, U, E>& l)
+            {
+                fuse_convolution(l);
+            }
+        };
+    }
+
+    template <typename net_type>
+    void fuse_layers (
+        net_type& net
+    )
+    {
+        DLIB_CASSERT(count_parameters(net) > 0, "The network has to be allocated before fusing the layers.");
+        visit_layers(net, impl::visitor_fuse_layers());
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
+    namespace impl
+    {
+        class visitor_net_to_xml
+        {
+        public:
+
+            visitor_net_to_xml(std::ostream& out_) : out(out_) {}
+
+            template<typename input_layer_type>
+            void operator()(size_t idx, const input_layer_type& l) 
+            {
+                out << "<layer idx='"<<idx<<"' type='input'>\n";
+                to_xml(l,out);
+                out << "</layer>\n";
+            }
+
+            template <typename T, typename U>
+            void operator()(size_t idx, const add_loss_layer<T,U>& l) 
+            {
+                out << "<layer idx='"<<idx<<"' type='loss'>\n";
+                to_xml(l.loss_details(),out);
+                out << "</layer>\n";
+            }
+
+            template <typename T, typename U, typename E>
+            void operator()(size_t idx, const add_layer<T,U,E>& l) 
+            {
+                out << "<layer idx='"<<idx<<"' type='comp'>\n";
+                to_xml(l.layer_details(),out);
+                out << "</layer>\n";
+            }
+
+            template <unsigned long ID, typename U, typename E>
+            void operator()(size_t idx, const add_tag_layer<ID,U,E>& /*l*/) 
+            {
+                out << "<layer idx='"<<idx<<"' type='tag' id='"<<ID<<"'/>\n";
+            }
+
+            template <template<typename> class T, typename U>
+            void operator()(size_t idx, const add_skip_layer<T,U>& /*l*/) 
+            {
+                out << "<layer idx='"<<idx<<"' type='skip' id='"<<(tag_id<T>::id)<<"'/>\n";
+            }
+
+        private:
+
+            std::ostream& out;
+        };
+    }
+
+    template <typename net_type>
+    void net_to_xml (
+        const net_type& net,
+        std::ostream& out
+    )
+    {
+        auto old_precision = out.precision(9);
+        out << "<net>\n";
+        visit_layers(net, impl::visitor_net_to_xml(out));
+        out << "</net>\n";
+        // restore the original stream precision.
+        out.precision(old_precision);
+    }
+
+    template <typename net_type>
+    void net_to_xml (
+        const net_type& net,
+        const std::string& filename
+    )
+    {
+        std::ofstream fout(filename);
+        net_to_xml(net, fout);
+    }
+
+// ----------------------------------------------------------------------------------------
+// ----------------------------------------------------------------------------------------
+
    namespace impl
    {
        class visitor_net_to_dot

--- a/dlib/dnn/visitors_abstract.h
+++ b/dlib/dnn/visitors_abstract.h
@@ -10,6 +10,170 @@
 namespace dlib
 {

+// ----------------------------------------------------------------------------------------
+
+    template <typename net_type>
+    void set_all_bn_running_stats_window_sizes (
+        const net_type& net,
+        unsigned long new_window_size
+    );
+    /*!
+        requires
+            - new_window_size > 0
+            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
+              add_tag_layer.
+        ensures
+            - Sets the get_running_stats_window_size() field of all bn_ layers in net to
+              new_window_size.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename net_type>
+    void fuse_layers (
+        net_type& net
+    );
+    /*!
+        requires
+            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
+              add_tag_layer.
+            - net has been properly allocated, that is: count_parameters(net) > 0.
+        ensures
+            - Disables all the affine_ layers that have a convolution as an input.
+            - Updates the convolution weights beneath the affine_ layers to produce the same
+              output as with the affine_ layers enabled.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename net_type>
+    void net_to_xml (
+        const net_type& net,
+        std::ostream& out
+    );
+    /*!
+        requires
+            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
+              add_tag_layer.
+            - All layers in the net must provide to_xml() functions.
+        ensures
+            - Prints the given neural network object as an XML document to the given output
+              stream.
+    !*/
+
+    template <typename net_type>
+    void net_to_xml (
+        const net_type& net,
+        const std::string& filename
+    );
+    /*!
+        requires
+            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
+              add_tag_layer.
+            - All layers in the net must provide to_xml() functions.
+        ensures
+            - This function is just like the above net_to_xml(), except it writes to a file
+              rather than an ostream.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename net_type>
+    dpoint input_tensor_to_output_tensor(
+        const net_type& net,
+        dpoint p 
+    );
+    /*!
+        requires
+            - net_type is an object of type add_layer, add_skip_layer, or add_tag_layer.
+            - All layers in the net must provide map_input_to_output() functions.
+        ensures
+            - Given a dpoint (i.e. a row,column coordinate) in the input tensor given to
+              net, this function returns the corresponding dpoint in the output tensor
+              net.get_output().  This kind of mapping is useful when working with fully
+              convolutional networks as you will often want to know what parts of the
+              output feature maps correspond to what parts of the input.
+            - If the network contains skip layers then any layers skipped over by the skip
+              layer are ignored for the purpose of computing this coordinate mapping.  That
+              is, if you walk the network from the output layer to the input layer, where
+              each time you encounter a skip layer you jump to the layer indicated by the
+              skip layer, you will visit exactly the layers in the network involved in the
+              input_tensor_to_output_tensor() calculation. This behavior is useful since it
+              allows you to compute some auxiliary DNN as a separate branch of computation
+              that is separate from the main network's job of running some kind of fully
+              convolutional network over an image.  For instance, you might want to have a
+              branch in your network that computes some global image level
+              summarization/feature.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename net_type>
+    dpoint output_tensor_to_input_tensor(
+        const net_type& net,
+        dpoint p  
+    );
+    /*!
+        requires
+            - net_type is an object of type add_layer, add_skip_layer, or add_tag_layer.
+            - All layers in the net must provide map_output_to_input() functions.
+        ensures
+            - This function provides the reverse mapping of input_tensor_to_output_tensor().
+              That is, given a dpoint in net.get_output(), what is the corresponding dpoint
+              in the input tensor?
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <typename net_type>
+    inline size_t count_parameters(
+        const net_type& net
+    );
+    /*!
+        requires
+            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
+              add_tag_layer.
+        ensures
+            - Returns the number of allocated parameters in the network. E.g. if the network has not
+              been trained then, since nothing has been allocated yet, it will return 0.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template<typename net_type>
+    void set_all_learning_rate_multipliers(
+        net_type& net,
+        double learning_rate_multiplier
+    );
+    /*!
+        requires
+            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
+              add_tag_layer.
+            - learning_rate_multiplier >= 0
+        ensures
+            - Sets all learning_rate_multipliers and bias_learning_rate_multipliers in net
+              to learning_rate_multiplier.
+    !*/
+
+// ----------------------------------------------------------------------------------------
+
+    template <size_t begin, size_t end, typename net_type>
+    void set_learning_rate_multipliers_range(
+        net_type& net,
+        double learning_rate_multiplier
+    );
+    /*!
+        requires
+            - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
+              add_tag_layer.
+            - learning_rate_multiplier >= 0
+            - begin <= end <= net_type::num_layers
+        ensures
+            - Loops over the layers in the range [begin,end) in net and calls
+              set_learning_rate_multiplier on them with the value of
+              learning_rate_multiplier.
+    !*/
+
 // ----------------------------------------------------------------------------------------

    template <typename net_type>

--- a/docs/docs/term_index.xml
+++ b/docs/docs/term_index.xml
@@ -34,14 +34,18 @@


         <term file="dlib/algs.h.html" name="stack_based_memory_block" include="dlib/algs.h"/>
-         <term file="dlib/dnn/utilities_abstract.h.html" name="net_to_xml" include="dlib/dnn.h"/>
         <term file="dlib/dnn/utilities_abstract.h.html" name="log1pexp" include="dlib/dnn.h"/>
         <term file="dlib/dnn/utilities_abstract.h.html" name="randomize_parameters" include="dlib/dnn.h"/>
-         <term file="dlib/dnn/utilities_abstract.h.html" name="input_tensor_to_output_tensor" include="dlib/dnn.h"/>
-         <term file="dlib/dnn/utilities_abstract.h.html" name="output_tensor_to_input_tensor" include="dlib/dnn.h"/>
-         <term file="dlib/dnn/utilities_abstract.h.html" name="count_parameters" include="dlib/dnn.h"/>
-         <term file="dlib/dnn/utilities_abstract.h.html" name="set_all_learning_rate_multipliers" include="dlib/dnn.h"/>
-         <term file="dlib/dnn/utilities_abstract.h.html" name="set_learning_rate_multipliers_range" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/visitors_abstract.h.html" name="set_all_bn_running_stats_window_sizes" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/visitors_abstract.h.html" name="disable_duplicative_biases" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/visitors_abstract.h.html" name="fuse_layers" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/visitors_abstract.h.html" name="net_to_xml" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/visitors_abstract.h.html" name="net_to_dot" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/visitors_abstract.h.html" name="input_tensor_to_output_tensor" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/visitors_abstract.h.html" name="output_tensor_to_input_tensor" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/visitors_abstract.h.html" name="count_parameters" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/visitors_abstract.h.html" name="set_all_learning_rate_multipliers" include="dlib/dnn.h"/>
+         <term file="dlib/dnn/visitors_abstract.h.html" name="set_learning_rate_multipliers_range" include="dlib/dnn.h"/>
         <term file="dlib/dnn/core_abstract.h.html" name="tuple_head" include="dlib/dnn.h"/>
         <term file="dlib/dnn/core_abstract.h.html" name="tuple_tail" include="dlib/dnn.h"/>
         <term file="dlib/dnn/core_abstract.h.html" name="get_learning_rate_multiplier" include="dlib/dnn.h"/>
@@ -103,9 +107,6 @@
         <term file="dlib/dnn/layers_abstract.h.html" name="layer_mode" include="dlib/dnn.h"/>
         <term file="dlib/dnn/layers_abstract.h.html" name="CONV_MODE" include="dlib/dnn.h"/>
         <term file="dlib/dnn/layers_abstract.h.html" name="FC_MODE" include="dlib/dnn.h"/>
-         <term file="dlib/dnn/layers_abstract.h.html" name="set_all_bn_running_stats_window_sizes" include="dlib/dnn.h"/>
-         <term file="dlib/dnn/layers_abstract.h.html" name="disable_duplicative_biases" include="dlib/dnn.h"/>
-         <term file="dlib/dnn/layers_abstract.h.html" name="fuse_layers" include="dlib/dnn.h"/>
         <term file="dlib/cuda/tensor_abstract.h.html" name="tensor" include="dlib/cuda/tensor.h"/>
         <term file="dlib/cuda/tensor_abstract.h.html" name="resizable_tensor" include="dlib/cuda/tensor.h"/>
         <term file="dlib/cuda/tensor_abstract.h.html" name="alias_tensor_instance" include="dlib/cuda/tensor.h"/>