Added more options to control how the feature pool region is defined.

3fff7f84 · Davis King · b6dc0d8e · 3fff7f84 · 3fff7f84
Commit 3fff7f84 authored Sep 15, 2017 by Davis King
2 changed files
--- a/dlib/image_processing/shape_predictor_trainer.h
+++ b/dlib/image_processing/shape_predictor_trainer.h
@@ -21,6 +21,12 @@ namespace dlib
        !*/
    public:

+        enum padding_mode_t
+        {
+            bounding_box_relative,
+            landmark_relative 
+        };
+
        shape_predictor_trainer (
        )
        {
@@ -35,6 +41,7 @@ namespace dlib
            _feature_pool_region_padding = 0;
            _verbose = false;
            _num_threads = 0;
+            _padding_mode = landmark_relative;
        }

        unsigned long get_cascade_depth (
@@ -165,6 +172,15 @@ namespace dlib
            _num_test_splits = num;
        }

+        void set_padding_mode (
+            padding_mode_t mode
+        )
+        {
+            _padding_mode = mode;
+        }
+
+        padding_mode_t get_padding_mode (
+        ) const { return _padding_mode; }

        double get_feature_pool_region_padding (
        ) const { return _feature_pool_region_padding; }
@@ -172,6 +188,12 @@ namespace dlib
            double padding 
        )
        {
+            DLIB_CASSERT(padding > -0.5,
+                "\t void shape_predictor_trainer::set_feature_pool_region_padding()"
+                << "\n\t Invalid inputs were given to this function. "
+                << "\n\t padding: " << padding 
+            );
+
            _feature_pool_region_padding = padding;
        }

@@ -722,10 +744,23 @@ namespace dlib
            // Figure out the bounds on the object shapes.  We will sample uniformly
            // from this box.
            matrix<float> temp = reshape(initial_shape, initial_shape.size()/2, 2);
-            const double min_x = min(colm(temp,0))-padding;
-            const double min_y = min(colm(temp,1))-padding;
-            const double max_x = max(colm(temp,0))+padding;
-            const double max_y = max(colm(temp,1))+padding;
+            double min_x = min(colm(temp,0))-padding;
+            double min_y = min(colm(temp,1))-padding;
+            double max_x = max(colm(temp,0))+padding;
+            double max_y = max(colm(temp,1))+padding;
+
+            if (get_padding_mode() == bounding_box_relative)
+            {
+                min_x = std::min(0.0, min_x);
+                min_y = std::min(0.0, min_y);
+                max_x = std::max(1.0, max_x);
+                max_y = std::max(1.0, max_y);
+            }
+
+            min_x -= padding;
+            min_y -= padding;
+            max_x += padding;
+            max_y += padding;

            std::vector<std::vector<dlib::vector<float,2> > > pixel_coordinates;
            pixel_coordinates.resize(get_cascade_depth());
@@ -749,6 +784,7 @@ namespace dlib
        double _feature_pool_region_padding;
        bool _verbose;
        unsigned long _num_threads;
+        padding_mode_t _padding_mode;
    };

 // ----------------------------------------------------------------------------------------

--- a/dlib/image_processing/shape_predictor_trainer_abstract.h
+++ b/dlib/image_processing/shape_predictor_trainer_abstract.h
@@ -38,6 +38,7 @@ namespace dlib
                - #get_feature_pool_region_padding() == 0
                - #get_random_seed() == ""
                - #get_num_threads() == 0
+                - #get_padding_mode() == landmark_relative 
                - This object will not be verbose
        !*/

@@ -180,16 +181,53 @@ namespace dlib
                - #get_feature_pool_size() == size
        !*/

+        enum padding_mode_t
+        {
+            bounding_box_relative,
+            landmark_relative 
+        };
+
+        padding_mode_t get_padding_mode (
+        ) const; 
+        /*!
+            ensures
+                - returns the current padding mode.  See get_feature_pool_region_padding()
+                  for a discussion of the modes.
+        !*/
+
+        void set_padding_mode (
+            padding_mode_t mode
+        );
+        /*!
+            ensures
+                - #get_padding_mode() == mode
+        !*/
+
        double get_feature_pool_region_padding (
        ) const; 
        /*!
            ensures
-                - When we randomly sample the pixels for the feature pool we do so in a box
-                  fit around the provided training landmarks.  By default, this box is the
-                  tightest box that contains the landmarks (i.e. this is what happens when
-                  get_feature_pool_region_padding()==0).  However, you can expand or shrink
-                  the size of the pixel sampling region by setting a different value of
-                  get_feature_pool_region_padding().  
+                - This algorithm works by comparing the relative intensity of pairs of
+                  pixels in the input image.  To decide which pixels to look at, the
+                  training algorithm randomly selects pixels from a box roughly centered
+                  around the object of interest.  We call this box the feature pool region
+                  box.  
+                  
+                  Each object of interest is defined by a full_object_detection, which
+                  contains a bounding box and a list of landmarks.  If
+                  get_padding_mode()==landmark_relative then the feature pool region box is
+                  the tightest box that contains the landmarks inside the
+                  full_object_detection.  In this mode the full_object_detection's bounding
+                  box is ignored.  Otherwise, if the padding mode is bounding_box_relative
+                  then the feature pool region box is the tightest box that contains BOTH
+                  the landmarks and the full_object_detection's bounding box.
+
+                  Additionally, you can adjust the size of the feature pool padding region
+                  by setting get_feature_pool_region_padding() to some value.  If
+                  get_feature_pool_region_padding()==0 then the feature pool region box is
+                  unmodified and defined exactly as stated above. However, you can expand
+                  the size of the box by setting the padding > 0 or shrink it by setting it
+                  to something < 0.

                  To explain this precisely, for a padding of 0 we say that the pixels are
                  sampled from a box of size 1x1.  The padding value is added to each side
@@ -203,11 +241,12 @@ namespace dlib
            double padding 
        );
        /*!
+            requires
+                - padding > -0.5
            ensures
                - #get_feature_pool_region_padding() == padding
        !*/

-
        double get_lambda (
        ) const;
        /*!