merged

ebdc064c · Davis King · 917dcad3 · 0ed1ce61 · ebdc064c · ebdc064c
Commit ebdc064c authored Jul 07, 2017 by Davis King
Show whitespace changes
Inline Side-by-side

Showing with 337 additions and 7 deletions

dlib/dnn/loss.h dlib/dnn/loss.h +145 -3

dlib/dnn/loss_abstract.h dlib/dnn/loss_abstract.h +88 -0

dlib/test/dnn.cpp dlib/test/dnn.cpp +100 -4

setup.py setup.py +4 -0

No files found.
--- a/dlib/dnn/loss.h
+++ b/dlib/dnn/loss.h
@@ -1549,11 +1549,11 @@ namespace dlib
            typename SUB_TYPE,
            typename label_iterator
            >
-        void to_label (
+        static void to_label (
            const tensor& input_tensor,
            const SUB_TYPE& sub,
            label_iterator iter
-        ) const
+        )
        {
            DLIB_CASSERT(sub.sample_expansion_factor() == 1);
@@ -1678,7 +1678,7 @@ namespace dlib
            std::string version;
            deserialize(version, in);
            if (version != "loss_multiclass_log_per_pixel_")
-                throw serialization_error("Unexpected version found while deserializing dlib::loss_multiclass_log_.");
+                throw serialization_error("Unexpected version found while deserializing dlib::loss_multiclass_log_per_pixel_.");
        }
        friend std::ostream& operator<<(std::ostream& out, const loss_multiclass_log_per_pixel_& )
@@ -1704,6 +1704,148 @@ namespace dlib
    template <typename SUBNET>
    using loss_multiclass_log_per_pixel = add_loss_layer<loss_multiclass_log_per_pixel_, SUBNET>;
+// ----------------------------------------------------------------------------------------
+    class loss_multiclass_log_per_pixel_weighted_
+    {
+    public:
+        struct weighted_label
+        {
+            weighted_label()
+            {}
+            weighted_label(uint16_t label, float weight = 1.f)
+                : label(label), weight(weight)
+            {}
+            // In semantic segmentation, 65536 classes ought to be enough for anybody.
+            uint16_t label = 0;
+            float weight = 1.f;
+        };
+        typedef matrix<weighted_label> training_label_type;
+        typedef matrix<uint16_t> output_label_type;
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        static void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        )
+        {
+            loss_multiclass_log_per_pixel_::to_label(input_tensor, sub, iter);
+        }
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss_value_and_gradient (
+            const tensor& input_tensor,
+            const_label_iterator truth,
+            SUBNET& sub
+        ) const
+        {
+            const tensor& output_tensor = sub.get_output();
+            tensor& grad = sub.get_gradient_input();
+            DLIB_CASSERT(sub.sample_expansion_factor() == 1);
+            DLIB_CASSERT(input_tensor.num_samples() != 0);
+            DLIB_CASSERT(input_tensor.num_samples()%sub.sample_expansion_factor() == 0);
+            DLIB_CASSERT(input_tensor.num_samples() == grad.num_samples());
+            DLIB_CASSERT(input_tensor.num_samples() == output_tensor.num_samples());
+            DLIB_CASSERT(output_tensor.k() >= 1);
+            DLIB_CASSERT(output_tensor.k() < std::numeric_limits<uint16_t>::max());
+            DLIB_CASSERT(output_tensor.nr() == grad.nr() &&
+                         output_tensor.nc() == grad.nc() &&
+                         output_tensor.k() == grad.k());
+            for (long idx = 0; idx < output_tensor.num_samples(); ++idx)
+            {
+                const_label_iterator truth_matrix_ptr = (truth + idx);
+                DLIB_CASSERT(truth_matrix_ptr->nr() == output_tensor.nr() &&
+                             truth_matrix_ptr->nc() == output_tensor.nc(),
+                             "truth size = " << truth_matrix_ptr->nr() << " x " << truth_matrix_ptr->nc() << ", "
+                             "output size = " << output_tensor.nr() << " x " << output_tensor.nc());
+            }
+            tt::softmax(grad, output_tensor);
+            // The loss we output is the weighted average loss over the mini-batch, and also over each element of the matrix output.
+            const double scale = 1.0 / (output_tensor.num_samples() * output_tensor.nr() * output_tensor.nc());
+            double loss = 0;
+            float* const g = grad.host();
+            for (long i = 0; i < output_tensor.num_samples(); ++i, ++truth)
+            {
+                for (long r = 0; r < output_tensor.nr(); ++r)
+                {
+                    for (long c = 0; c < output_tensor.nc(); ++c)
+                    {
+                        const weighted_label& weighted_label = truth->operator()(r, c);
+                        const uint16_t y = weighted_label.label;
+                        const float weight = weighted_label.weight;
+                        // The network must produce a number of outputs that is equal to the number
+                        // of labels when using this type of loss.
+                        DLIB_CASSERT(static_cast<long>(y) < output_tensor.k() || weight == 0.f,
+                                        "y: " << y << ", output_tensor.k(): " << output_tensor.k());
+                        for (long k = 0; k < output_tensor.k(); ++k)
+                        {
+                            const size_t idx = tensor_index(output_tensor, i, r, c, k);
+                            if (k == y)
+                            {
+                                loss += weight*scale*-std::log(g[idx]);
+                                g[idx] = weight*scale*(g[idx] - 1);
+                            }
+                            else
+                            {
+                                g[idx] = weight*scale*g[idx];
+                            }
+                        }
+                    }
+                }
+            }
+            return loss;
+        }
+        friend void serialize(const loss_multiclass_log_per_pixel_weighted_& , std::ostream& out)
+        {
+            serialize("loss_multiclass_log_per_pixel_weighted_", out);
+        }
+        friend void deserialize(loss_multiclass_log_per_pixel_weighted_& , std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "loss_multiclass_log_per_pixel_weighted_")
+                throw serialization_error("Unexpected version found while deserializing dlib::loss_multiclass_log_per_pixel_weighted_.");
+        }
+        friend std::ostream& operator<<(std::ostream& out, const loss_multiclass_log_per_pixel_weighted_& )
+        {
+            out << "loss_multiclass_log_per_pixel_weighted";
+            return out;
+        }
+        friend void to_xml(const loss_multiclass_log_per_pixel_weighted_& /*item*/, std::ostream& out)
+        {
+            out << "<loss_multiclass_log_per_pixel_weighted/>";
+        }
+    private:
+        static size_t tensor_index(const tensor& t, long sample, long row, long column, long k)
+        {
+            // See: https://github.com/davisking/dlib/blob/4dfeb7e186dd1bf6ac91273509f687293bd4230a/dlib/dnn/tensor_abstract.h#L38
+            return ((sample * t.k() + k) * t.nr() + row) * t.nc() + column;
+        }
+    };
+    template <typename SUBNET>
+    using loss_multiclass_log_per_pixel_weighted = add_loss_layer<loss_multiclass_log_per_pixel_weighted_, SUBNET>;
 // ----------------------------------------------------------------------------------------
 }

--- a/dlib/dnn/loss_abstract.h
+++ b/dlib/dnn/loss_abstract.h
@@ -863,6 +863,94 @@ namespace dlib
    template <typename SUBNET>
    using loss_multiclass_log_per_pixel = add_loss_layer<loss_multiclass_log_per_pixel_, SUBNET>;
+// ----------------------------------------------------------------------------------------
+    class loss_multiclass_log_per_pixel_weighted_
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This object implements the loss layer interface defined above by
+                EXAMPLE_LOSS_LAYER_.  In particular, it implements the multiclass logistic
+                regression loss (e.g. negative log-likelihood loss), which is appropriate
+                for multiclass classification problems.  It is basically just like
+                loss_multiclass_log_per_pixel_ except that it lets you define per-pixel
+                weights, which may be useful e.g. if you want to emphasize rare classes
+                while training.  (If the classification problem is difficult, a flat weight
+                structure may lead the network to always predict the most common label, in
+                particular if the degree of imbalance is high.  To emphasize a certain
+                class or classes, simply increase the weights of the corresponding pixels,
+                relative to the weights of the other pixels.)
+                Note that if you set the weight to 0 whenever a pixel's label is equal to
+                loss_multiclass_log_per_pixel_::label_to_ignore, and to 1 otherwise, then
+                you essentially get loss_multiclass_log_per_pixel_ as a special case.
+        !*/
+    public:
+        struct weighted_label
+        {
+            /*!
+                WHAT THIS OBJECT REPRESENTS
+                    This object represents the truth label of a single pixel, together with
+                    an associated weight (the higher the weight, the more emphasis the
+                    corresponding pixel is given during the training).
+            !*/
+            weighted_label();
+            weighted_label(uint16_t label, float weight = 1.f);
+            // The ground-truth label. In semantic segmentation, 65536 classes ought to be
+            // enough for anybody.
+            uint16_t label = 0;
+            // The weight of the corresponding pixel.
+            float weight = 1.f;
+        };
+        typedef matrix<weighted_label> training_label_type;
+        typedef matrix<uint16_t> output_label_type;
+        template <
+            typename SUB_TYPE,
+            typename label_iterator
+            >
+        void to_label (
+            const tensor& input_tensor,
+            const SUB_TYPE& sub,
+            label_iterator iter
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::to_label() except
+            it has the additional calling requirements that:
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - sub.sample_expansion_factor() == 1
+            and the output label is the predicted class for each classified element.  The number
+            of possible output classes is sub.get_output().k().
+        !*/
+        template <
+            typename const_label_iterator,
+            typename SUBNET
+            >
+        double compute_loss_value_and_gradient (
+            const tensor& input_tensor,
+            const_label_iterator truth,
+            SUBNET& sub
+        ) const;
+        /*!
+            This function has the same interface as EXAMPLE_LOSS_LAYER_::compute_loss_value_and_gradient()
+            except it has the additional calling requirements that:
+                - sub.get_output().num_samples() == input_tensor.num_samples()
+                - sub.sample_expansion_factor() == 1
+                - all labels pointed to by truth are < sub.get_output().k(), or the corresponding weight
+                  is zero.
+        !*/
+    };
+    template <typename SUBNET>
+    using loss_multiclass_log_per_pixel_weighted = add_loss_layer<loss_multiclass_log_per_pixel_weighted_, SUBNET>;
 // ----------------------------------------------------------------------------------------
 }

--- a/dlib/test/dnn.cpp
+++ b/dlib/test/dnn.cpp
@@ -2331,7 +2331,102 @@ namespace
 // ----------------------------------------------------------------------------------------
-    void test_tensor_resize_bilienar(long samps, long k, long nr, long nc,  long onr, long onc)
+    void test_loss_multiclass_per_pixel_weighted()
+    {
+        // Train with pixel-specific weights
+        print_spinner();
+        constexpr int input_height = 5;
+        constexpr int input_width = 7;
+        constexpr int output_height = input_height;
+        constexpr int output_width = input_width;
+        const int num_samples = 1000;
+        const int num_classes = 6;
+        ::std::default_random_engine generator(16);
+        ::std::uniform_real_distribution<double> u01(0.0, 1.0);
+        ::std::uniform_int_distribution<uint16_t> noisy_label(0, num_classes - 1);
+        ::std::vector<matrix<double>> x(num_samples);
+        ::std::vector<matrix<uint16_t>> y(num_samples);
+        matrix<double> xtmp(input_height, input_width);
+        matrix<uint16_t> ytmp(output_height, output_width);
+        // Generate input data
+        for (int ii = 0; ii < num_samples; ++ii) {
+            for (int jj = 0; jj < input_height; ++jj) {
+                for (int kk = 0; kk < input_width; ++kk) {
+                    xtmp(jj, kk) = u01(generator);
+                    ytmp(jj, kk) = noisy_label(generator);
+                }
+            }
+            x[ii] = xtmp;
+            y[ii] = ytmp;
+        }
+        using net_type = loss_multiclass_log_per_pixel_weighted<con<num_classes,1,1,1,1,input<matrix<double>>>>;
+        using weighted_label = loss_multiclass_log_per_pixel_weighted_::weighted_label;
+        ::std::vector<matrix<weighted_label>> y_weighted(num_samples);
+        for (int weighted_class = 0; weighted_class < num_classes; ++weighted_class) {
+            print_spinner();
+            // Assign weights
+            for (int ii = 0; ii < num_samples; ++ii) {
+                if (weighted_class == 0) {
+                    y_weighted[ii].set_size(input_height, input_width);
+                }
+                for (int jj = 0; jj < input_height; ++jj) {
+                    for (int kk = 0; kk < input_width; ++kk) {
+                        const uint16_t label = y[ii](jj, kk);
+                        const float weight
+                            = label == weighted_class
+                            ? 1.1f
+                            : 0.9f;
+                        y_weighted[ii](jj, kk) = weighted_label(label, weight);
+                    }
+                }
+            }
+            net_type net;
+            sgd defsolver(0,0.9);
+            dnn_trainer<net_type> trainer(net, defsolver);
+            trainer.set_learning_rate(0.1);
+            trainer.set_min_learning_rate(0.01);
+            trainer.set_mini_batch_size(10);
+            trainer.set_max_num_epochs(10);
+            trainer.train(x, y_weighted);
+            const ::std::vector<matrix<uint16_t>> predictions = net(x);
+            int num_weighted_class = 0;
+            int num_not_weighted_class = 0;
+            for ( int ii = 0; ii < num_samples; ++ii ) {
+                const matrix<uint16_t>& prediction = predictions[ii];
+                DLIB_TEST(prediction.nr() == output_height);
+                DLIB_TEST(prediction.nc() == output_width);
+                for ( int jj = 0; jj < output_height; ++jj )
+                    for ( int kk = 0; kk < output_width; ++kk )
+                        if ( prediction(jj, kk) == weighted_class )
+                            ++num_weighted_class;
+                        else 
+                            ++num_not_weighted_class;
+            }
+            DLIB_TEST_MSG(num_weighted_class > num_not_weighted_class,
+                          "The weighted class (" << weighted_class << ") does not dominate: "
+                          << num_weighted_class << " <= " << num_not_weighted_class);
+        }
+    }
+// ----------------------------------------------------------------------------------------
+    void test_tensor_resize_bilinear(long samps, long k, long nr, long nc,  long onr, long onc)
    {
        resizable_tensor img(samps,k,nr,nc);
        resizable_tensor out(samps,k,onr,onc);
@@ -2426,9 +2521,9 @@ namespace
            compare_adam();
            test_copy_tensor_gpu();
 #endif
-            test_tensor_resize_bilienar(2, 3, 6,6, 11, 11);
+            test_tensor_resize_bilinear(2, 3, 6,6, 11, 11);
-            test_tensor_resize_bilienar(2, 3, 6,6, 3, 4);
+            test_tensor_resize_bilinear(2, 3, 6,6, 3, 4);
-            test_tensor_resize_bilienar(2, 3, 5,6, 12, 21);
+            test_tensor_resize_bilinear(2, 3, 5,6, 12, 21);
            test_max_pool(1,1,2,3,0,0);
            test_max_pool(3,3,1,1,0,0);
            test_max_pool(3,3,2,2,0,0);
@@ -2469,6 +2564,7 @@ namespace
            test_loss_multiclass_per_pixel_activations_on_trivial_single_pixel_task();
            test_loss_multiclass_per_pixel_outputs_on_trivial_task();
            test_loss_multiclass_per_pixel_with_noise_and_pixels_to_ignore();
+            test_loss_multiclass_per_pixel_weighted();
        }
        void perform_test()

--- a/setup.py
+++ b/setup.py
@@ -526,7 +526,11 @@ class build(_build):
            # this checks the sysconfig and will correctly pick up a brewed python lib
            # e.g. in /usr/local/Cellar
            py_ver = get_python_version()
+            # check: in some virtual environments the libpython has the form "libpython_#m.dylib
            py_lib = os.path.join(get_config_var('LIBDIR'), 'libpython'+py_ver+'.dylib')
+            if not os.path.isfile(py_lib):
+                py_lib = os.path.join(get_config_var('LIBDIR'), 'libpython'+py_ver+'m.dylib')
            cmake_extra_arch += ['-DPYTHON_LIBRARY={lib}'.format(lib=py_lib)]
        if sys.platform == "win32":