Added CPU convolution implementation.

fe6e2457 · Davis King · fdfe77d1 · fe6e2457 · fe6e2457 · fe6e2457
Commit fe6e2457 authored Jan 24, 2016 by Davis King
4 changed files
--- a/dlib/dnn/cpu_dlib.cpp
+++ b/dlib/dnn/cpu_dlib.cpp
@@ -1362,6 +1362,174 @@ namespace dlib
        }
+    // ------------------------------------------------------------------------------------
+    // ------------------------------------------------------------------------------------
+    // ------------------------------------------------------------------------------------
+        void img2col(
+            matrix<float>& output,
+            const tensor& data,
+            long n,
+            long filter_nr,
+            long filter_nc,
+            long stride_y,
+            long stride_x
+        )
+        {
+            const auto d = data.host() + data.k()*data.nr()*data.nc()*n;
+            const rectangle boundary = get_rect(data);
+            const long out_nr = 1+(data.nr()-filter_nr%2)/stride_y;
+            const long out_nc = 1+(data.nc()-filter_nc%2)/stride_x;
+            output.set_size(out_nr*out_nc, 
+                            data.k()*filter_nr*filter_nc);
+            DLIB_CASSERT(output.size() != 0,"");
+            float* t = &output(0,0);
+            // now fill in the Toeplitz output matrix for the n-th sample in data.  
+            size_t cnt = 0;
+            for (long r = -(1-filter_nr%2); r < data.nr(); r+=stride_y)
+            {
+                for (long c = -(1-filter_nc%2); c < data.nc(); c+=stride_x)
+                {
+                    for (long k = 0; k < data.k(); ++k)
+                    {
+                        for (long y = 0; y < filter_nr; ++y)
+                        {
+                            for (long x = 0; x < filter_nc; ++x)
+                            {
+                                DLIB_CASSERT(cnt < output.size(),"");
+                                long xx = c-x+filter_nc/2;
+                                long yy = r-y+filter_nr/2;
+                                if (boundary.contains(xx,yy))
+                                    *t = d[(k*data.nr() + yy)*data.nc() + xx];
+                                else
+                                    *t = 0;
+                                ++t;
+                                ++cnt;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        void col2img(
+            const matrix<float>& output,
+            tensor& data,
+            long n,
+            long filter_nr,
+            long filter_nc,
+            long stride_y,
+            long stride_x
+        )
+        {
+            const auto d = data.host() + data.k()*data.nr()*data.nc()*n;
+            const rectangle boundary = get_rect(data);
+            DLIB_CASSERT(output.size() != 0,"");
+            const float* t = &output(0,0);
+            // now fill in the Toeplitz output matrix for the n-th sample in data.  
+            for (long r = -(1-filter_nr%2); r < data.nr(); r+=stride_y)
+            {
+                for (long c = -(1-filter_nc%2); c < data.nc(); c+=stride_x)
+                {
+                    for (long k = 0; k < data.k(); ++k)
+                    {
+                        for (long y = 0; y < filter_nr; ++y)
+                        {
+                            for (long x = 0; x < filter_nc; ++x)
+                            {
+                                long xx = c-x+filter_nc/2;
+                                long yy = r-y+filter_nr/2;
+                                if (boundary.contains(xx,yy))
+                                    d[(k*data.nr() + yy)*data.nc() + xx] += *t;
+                                ++t;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        void tensor_conv::operator() (
+            resizable_tensor& output,
+            const tensor& data,
+            const tensor& filters,
+            int stride_y,
+            int stride_x
+        )
+        {
+            DLIB_CASSERT(is_same_object(output,data) == false,"");
+            DLIB_CASSERT(is_same_object(output,filters) == false,"");
+            DLIB_CASSERT(filters.k() == data.k(),"");
+            DLIB_CASSERT(stride_y > 0 && stride_x > 0,"");
+            output.set_size(data.num_samples(),
+                            filters.num_samples(),
+                            1+(data.nr()-filters.nr()%2)/stride_y,
+                            1+(data.nc()-filters.nc()%2)/stride_x);
+            matrix<float> temp;
+            for (long n = 0; n < data.num_samples(); ++n)
+            {
+                img2col(temp, data, n, filters.nr(), filters.nc(), stride_y, stride_x);
+                output.set_sample(n, mat(filters)*trans(temp));
+            }
+            last_stride_y = stride_y;
+            last_stride_x = stride_x;
+        }
+    // ------------------------------------------------------------------------------------
+        void tensor_conv::
+        get_gradient_for_data (
+            const tensor& gradient_input, 
+            const tensor& filters,
+            tensor& data_gradient
+        )
+        {
+            matrix<float> temp;
+            for (long n = 0; n < gradient_input.num_samples(); ++n)
+            {
+                auto gi = mat(gradient_input.host()+gradient_input.k()*gradient_input.nr()*gradient_input.nc()*n,
+                              gradient_input.k(),
+                              gradient_input.nr()*gradient_input.nc());
+                temp = trans(gi)*mat(filters);
+                col2img(temp, data_gradient, n, filters.nr(), filters.nc(), last_stride_y, last_stride_x);
+            }
+        }
+    // ------------------------------------------------------------------------------------
+        void tensor_conv::
+        get_gradient_for_filters (
+            const tensor& gradient_input, 
+            const tensor& data,
+            tensor& filters_gradient
+        )
+        {
+            matrix<float> temp;
+            for (long n = 0; n < gradient_input.num_samples(); ++n)
+            {
+                auto gi = mat(gradient_input.host()+gradient_input.k()*gradient_input.nr()*gradient_input.nc()*n,
+                              gradient_input.k(),
+                              gradient_input.nr()*gradient_input.nc());
+                img2col(temp, data, n, filters_gradient.nr(), filters_gradient.nc(), last_stride_y, last_stride_x);
+                if (n == 0)
+                    filters_gradient = gi*temp;
+                else
+                    filters_gradient += gi*temp;
+            }
+        }
    // ------------------------------------------------------------------------------------
    // ------------------------------------------------------------------------------------
    // ------------------------------------------------------------------------------------

--- a/dlib/dnn/cpu_dlib.h
+++ b/dlib/dnn/cpu_dlib.h
@@ -286,6 +286,45 @@ namespace dlib
        };
+    // -----------------------------------------------------------------------------------
+        class tensor_conv
+        {
+        public:
+            tensor_conv(const tensor_conv&) = delete;
+            tensor_conv& operator=(const tensor_conv&) = delete;
+            tensor_conv() {}
+            void clear(
+            ) {}
+            void operator() (
+                resizable_tensor& output,
+                const tensor& data,
+                const tensor& filters,
+                int stride_y,
+                int stride_x
+            );
+            void get_gradient_for_data (
+                const tensor& gradient_input, 
+                const tensor& filters,
+                tensor& data_gradient
+            );
+            void get_gradient_for_filters (
+                const tensor& gradient_input, 
+                const tensor& data,
+                tensor& filters_gradient
+            );
+        private:
+            long last_stride_y;
+            long last_stride_x;
+        };
    // -----------------------------------------------------------------------------------
    } 

--- a/dlib/dnn/tensor_tools.cpp
+++ b/dlib/dnn/tensor_tools.cpp
@@ -409,73 +409,6 @@ namespace dlib { namespace tt
 #endif
    }
-// ----------------------------------------------------------------------------------------
-// ----------------------------------------------------------------------------------------
-    tensor_conv::
-    tensor_conv()
-    {
-    }
-    void tensor_conv::
-    clear(
-    )
-    {
-#ifdef DLIB_USE_CUDA
-        impl.clear();
-#else
-        // TODO
-        DLIB_CASSERT(false,"");
-#endif
-    }
-    void tensor_conv::
-    operator() (
-        resizable_tensor& output,
-        const tensor& data,
-        const tensor& filters,
-        int stride_y,
-        int stride_x
-    )
-    {
-#ifdef DLIB_USE_CUDA
-        impl(output, data, filters, stride_y, stride_x);
-#else
-        // TODO
-        DLIB_CASSERT(false,"");
-#endif
-    }
-    void tensor_conv::
-    get_gradient_for_data (
-        const tensor& gradient_input, 
-        const tensor& filters,
-        tensor& data_gradient
-    )
-    {
-#ifdef DLIB_USE_CUDA
-        impl.get_gradient_for_data(gradient_input, filters, data_gradient);
-#else
-        // TODO
-        DLIB_CASSERT(false,"");
-#endif
-    }
-    void tensor_conv::
-    get_gradient_for_filters (
-        const tensor& gradient_input, 
-        const tensor& data,
-        tensor& filters_gradient
-    )
-    {
-#ifdef DLIB_USE_CUDA
-        impl.get_gradient_for_filters(gradient_input, data, filters_gradient);
-#else
-        // TODO
-        DLIB_CASSERT(false,"");
-#endif
-    }
 // ----------------------------------------------------------------------------------------
 // ----------------------------------------------------------------------------------------

--- a/dlib/dnn/tensor_tools.h
+++ b/dlib/dnn/tensor_tools.h
@@ -563,10 +563,10 @@ namespace dlib { namespace tt
        tensor_conv(const tensor_conv&) = delete;
        tensor_conv& operator=(const tensor_conv&) = delete;
-        tensor_conv();
+        tensor_conv() {}
        void clear(
-        );
+        ) { impl.clear(); }
        void operator() (
            resizable_tensor& output,
@@ -574,38 +574,39 @@ namespace dlib { namespace tt
            const tensor& filters,
            int stride_y,
            int stride_x
-        );
+        ) { impl(output,data,filters,stride_y,stride_x); }
        /*!
            requires
                - stride_y > 0
                - stride_x > 0
                - is_same_object(output,data) == false
                - is_same_object(output,filters) == false
+                - filters.k() == data.k()
            ensures
                - convolves filters over data.  
-                    - filters contains filters.num_samples() filters. 
+                - filters contains filters.num_samples() filters. 
-                    - #output.num_samples() == data.num_samples()
+                - #output.num_samples() == data.num_samples()
-                    - #output.k() == filters.num_samples()
+                - #output.k() == filters.num_samples()
-                    - #output.nr() == 1+(data.nr()-filters.nr()%2)/stride_y
+                - #output.nr() == 1+(data.nr()-filters.nr()%2)/stride_y
-                    - #output.nc() == 1+(data.nc()-filters.nc()%2)/stride_x
+                - #output.nc() == 1+(data.nc()-filters.nc()%2)/stride_x
        !*/
        void get_gradient_for_data (
            const tensor& gradient_input, 
            const tensor& filters,
            tensor& data_gradient
-        );
+        ) { impl.get_gradient_for_data(gradient_input,filters,data_gradient); }
        /*!
            requires
-                - filters has the same dimensions as the filters object give to the last
+                - filters has the same dimensions as the filters object given to the last
                  call to operator().
-                - data_gradient has the same dimensions as the data object give to the last
+                - data_gradient has the same dimensions as the data object given to the last
                  call to operator().
-                - gradient_input has the same dimensions as the output of operator().
+                - gradient_input has the same dimensions as the last output of operator().
                - is_same_object(data_gradient,filters) == false
                - is_same_object(data_gradient,gradient_input) == false
            ensures
-                - let OUT be the output of (*this)(OUT,data,filters).
+                - let OUT be the output of (*this)(OUT,data,filters,sx,sy).
                - let f(data,filters) == dot(OUT, gradient_input)
                - This function finds the gradient of f() with respect to data and adds
                  this gradient to data_gradient.
@@ -615,18 +616,18 @@ namespace dlib { namespace tt
            const tensor& gradient_input, 
            const tensor& data,
            tensor& filters_gradient
-        );
+        ) { impl.get_gradient_for_filters(gradient_input,data,filters_gradient); }
        /*!
            requires
-                - filters_gradient has the same dimensions as the filters object give to
+                - filters_gradient has the same dimensions as the filters object given to
                  the last call to operator().
-                - data has the same dimensions as the data object give to the last call to
+                - data has the same dimensions as the data object given to the last call to
                  operator().
-                - gradient_input has the same dimensions as the output of operator().
+                - gradient_input has the same dimensions as the last output of operator().
                - is_same_object(filters_gradient,data) == false
                - is_same_object(filters_gradient,gradient_input) == false
            ensures
-                - let OUT be the output of (*this)(OUT,data,filters).
+                - let OUT be the output of (*this)(OUT,data,filters,sx,sy).
                - let f(data,filters) == dot(OUT, gradient_input)
                - This function finds the gradient of f() with respect to filters and assigns 
                  this gradient to filters_gradient.
@@ -636,7 +637,7 @@ namespace dlib { namespace tt
 #ifdef DLIB_USE_CUDA
        cuda::tensor_conv impl;
 #else
-        // TODO
+        cpu::tensor_conv impl;
 #endif
    };