Made multiply() more flexible and also fixed a bug in the CPU implementation of

batch_normalize_conv.

Made multiply() more flexible and also fixed a bug in the CPU implementation of
batch_normalize_conv.
a29086bf · Davis King · 29f56b12 · a29086bf · a29086bf · a29086bf
Commit a29086bf authored Dec 08, 2015 by Davis King
Showing with 127 additions and 14 deletions

dlib/dnn/cpu_dlib.cpp dlib/dnn/cpu_dlib.cpp +33 -4

dlib/dnn/tensor_tools.cpp dlib/dnn/tensor_tools.cpp +7 -2

dlib/dnn/tensor_tools.h dlib/dnn/tensor_tools.h +17 -5

dlib/test/dnn.cpp dlib/test/dnn.cpp +70 -3

No files found.
--- a/dlib/dnn/cpu_dlib.cpp
+++ b/dlib/dnn/cpu_dlib.cpp
@@ -34,13 +34,38 @@ namespace dlib
            const tensor& src2
        )
        {
-            DLIB_CASSERT(dest.size()==src1.size(),"");
+            DLIB_CASSERT(dest.k() == src1.k() && src1.k() == src2.k() &&
-            DLIB_CASSERT(dest.size()==src2.size(),"");
+                dest.nr() == src1.nr() && src1.nr() == src2.nr() &&
+                dest.nc() == src1.nc() && src1.nc() == src2.nc() ,"");
+            const long MD = std::max(std::max(dest.num_samples(),src1.num_samples()),src2.num_samples());
+            DLIB_CASSERT((dest.num_samples()==1 || dest.num_samples()==MD) &&
+                (src1.num_samples()==1 || src1.num_samples()==MD) &&
+                (src2.num_samples()==1 || src2.num_samples()==MD) ,"");
+            if (dest.size() == 0)
+                return;
+            const size_t max_size = std::max(std::max(dest.size(),src1.size()),src2.size());
            const auto d = dest.host();
            const auto s1 = src1.host();
            const auto s2 = src2.host();
-            for (size_t i = 0; i < src1.size(); ++i)
+            if (dest.size() == src1.size() && src1.size() == src2.size())
-                d[i] = s1[i]*s2[i];
+            {
+                for (size_t i = 0; i < src1.size(); ++i)
+                    d[i] = s1[i]*s2[i];
+            }
+            else if (dest.num_samples() == 1)
+            {
+                for (size_t i = 0; i < dest.size(); ++i)
+                    d[i] = 0;
+                for (size_t i = 0; i < max_size; ++i)
+                    d[i%dest.size()] += s1[i%src1.size()]*s2[i%src2.size()];
+            }
+            else
+            {
+                for (size_t i = 0; i < max_size; ++i)
+                    d[i] = s1[i%src1.size()]*s2[i%src2.size()];
+            }
        }
    // -----------------------------------------------------------------------------------
@@ -422,6 +447,10 @@ namespace dlib
            DLIB_CASSERT(src.k() == beta_grad.size(),"");
            DLIB_CASSERT(have_same_dimensions(gradient_input, src),"");
            DLIB_CASSERT(have_same_dimensions(gradient_input, src_grad),"");
+            beta_grad = 0;
+            gamma_grad = 0;
            auto p_grad = gradient_input.host();
            auto p_src = src.host();
            const auto p_gamma = gamma.host();   

--- a/dlib/dnn/tensor_tools.cpp
+++ b/dlib/dnn/tensor_tools.cpp
@@ -116,8 +116,13 @@ namespace dlib { namespace tt
        const tensor& src2
    )
    {
-        DLIB_CASSERT(have_same_dimensions(dest,src1) == true,"");
+        DLIB_CASSERT(dest.k() == src1.k() && src1.k() == src2.k() &&
-        DLIB_CASSERT(have_same_dimensions(dest,src2) == true,"");
+            dest.nr() == src1.nr() && src1.nr() == src2.nr() &&
+            dest.nc() == src1.nc() && src1.nc() == src2.nc() ,"");
+        const long MD = std::max(std::max(dest.num_samples(),src1.num_samples()),src2.num_samples());
+        DLIB_CASSERT((dest.num_samples()==1 || dest.num_samples()==MD) &&
+                    (src1.num_samples()==1 || src1.num_samples()==MD) &&
+                    (src2.num_samples()==1 || src2.num_samples()==MD) ,"");
 #ifdef DLIB_USE_CUDA
        cuda::multiply(dest, src1, src2);
 #else

--- a/dlib/dnn/tensor_tools.h
+++ b/dlib/dnn/tensor_tools.h
@@ -92,6 +92,7 @@ namespace dlib { namespace tt
 // ----------------------------------------------------------------------------------------
+// TODO, delete this function
    void multiply (
        tensor& dest,
        const tensor& src
@@ -114,12 +115,23 @@ namespace dlib { namespace tt
    );
    /*!
        requires
-            - have_same_dimensions(dest,src1) == true
+            - dest.k()  == src1.k()  == src2.k()
-            - have_same_dimensions(dest,src2) == true
+            - dest.nr() == src1.nr() == src2.nr()
+            - dest.nc() == src1.nc() == src2.nc()
+            - dest.num_samples(), src1.num_samples(), and src2.num_samples() must each
+              either be 1 or whichever ones aren't equal to 1 must have the same values.
        ensures
-            - #dest == src1*src2
+            - let MD = max(dest.num_samples(), src1.num_samples(), src2.num_samples)
-              That is, for all valid i:
+            - This function pointwise multiplies src1 with src2 and stores the result into
-                #dest.host()[i] == src1.host()[i]*src2.host()[i]
+              #dest.  However, how the multiplication happens depends on the dimensions of
+              the tensors.  First, when src1 and src2 are multiplied together, if either
+              has a num_samples() dimension that is != MD, then it is first replicated to
+              produce a tensor with num_samples()==MD dimensions and then they are
+              pointwise multiplied together.
+              Second, if dest.num_samples()==1, then after the pointwise multiplication of
+              src1 with src2, the result has its samples summed to produce an output tensor
+              with num_samples()==1 which is then assigned to #dest.
    !*/
 // ----------------------------------------------------------------------------------------

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -380,6 +380,40 @@ namespace
        truth3 += 2;
        DLIB_TEST(mat(at(A,4)) == reshape(truth2,2,2));
        DLIB_TEST(mat(A) == join_cols(truth1,join_cols(truth2,truth3)));
+        {
+            resizable_tensor dest(3,4);
+            resizable_tensor A, B;
+            A = dest;
+            B = dest;
+            tensor_rand rnd;
+            rnd.fill_uniform(dest);
+            rnd.fill_uniform(A);
+            rnd.fill_uniform(B);
+            dest.set_size(1,4);
+            tt::multiply(dest, A, B);
+            DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(mat(A),mat(B))))) < 1e-6); 
+            A.set_size(1,4);
+            rnd.fill_uniform(A);
+            matrix<float> AA = join_cols(mat(A),mat(A)); AA = join_cols(mat(A),AA);
+            tt::multiply(dest, A, B);
+            DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6); 
+            tt::multiply(dest, B, A);
+            DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6); 
+            dest.set_size(3,4);
+            tt::multiply(dest, B, A);
+            DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6); 
+            tt::multiply(dest, A, B);
+            DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6); 
+        }
    }
 // ----------------------------------------------------------------------------------------
@@ -457,8 +491,40 @@ namespace
        cpu::threshold(src2, 0.5);
        DLIB_TEST(equal(mat(src),mat(src2)));
+        {
+            resizable_tensor dest(3,4);
+            resizable_tensor A, B;
+            A = dest;
+            B = dest;
+            tensor_rand rnd;
+            rnd.fill_uniform(dest);
+            rnd.fill_uniform(A);
+            rnd.fill_uniform(B);
+            dest.set_size(1,4);
+            cuda::multiply(dest, A, B);
+            DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(mat(A),mat(B))))) < 1e-6); 
+            A.set_size(1,4);
+            rnd.fill_uniform(A);
+            matrix<float> AA = join_cols(mat(A),mat(A)); AA = join_cols(mat(A),AA);
+            cuda::multiply(dest, A, B);
+            DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6); 
+            cuda::multiply(dest, B, A);
+            DLIB_TEST(max(abs(mat(dest)-sum_rows(pointwise_multiply(AA,mat(B))))) < 1e-6); 
+            dest.set_size(3,4);
+            cuda::multiply(dest, B, A);
+            DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6); 
+            cuda::multiply(dest, A, B);
+            DLIB_TEST(max(abs(mat(dest)-pointwise_multiply(AA,mat(B)))) < 1e-6); 
+        }
    }
-#endif
 // ----------------------------------------------------------------------------------------
@@ -561,6 +627,7 @@ namespace
        DLIB_TEST(max(abs(mat(gamma_grad)-mat(gamma_grad2))) < 1e-4);
        DLIB_TEST(max(abs(mat(beta_grad)-mat(beta_grad2))) < 1e-4);
    }
+#endif
 // ----------------------------------------------------------------------------------------
@@ -642,6 +709,8 @@ namespace
            test_more_ops(4,1);
            test_more_ops(1,4);
            test_more_ops(10000,4);
+            compare_bn_gpu_and_cpu();
+            compare_bn_conv_gpu_and_cpu();
 #endif
            test_tanh();
            test_softmax();
@@ -649,8 +718,6 @@ namespace
            test_batch_normalize();
            test_batch_normalize_conv();
            test_basic_tensor_ops();
-            compare_bn_gpu_and_cpu();
-            compare_bn_conv_gpu_and_cpu();
            test_layers();
        }
    } a;