Added input_rgb_image.

7fdcfb0b · Davis King · 3165ff28 · 7fdcfb0b · 7fdcfb0b
Commit 7fdcfb0b authored Apr 02, 2016 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 186 additions and 0 deletions

dlib/dnn/input.h dlib/dnn/input.h +103 -0

dlib/dnn/input_abstract.h dlib/dnn/input_abstract.h +83 -0

No files found.
--- a/dlib/dnn/input.h
+++ b/dlib/dnn/input.h
@@ -22,6 +22,109 @@ namespace dlib
            "dlib::matrix and dlib::array2d objects."); 
    };

+// ----------------------------------------------------------------------------------------
+
+    class input_rgb_image
+    {
+    public:
+        typedef matrix<rgb_pixel> input_type;
+        const static unsigned int sample_expansion_factor = 1;
+
+        input_rgb_image (
+        ) : 
+            avg_red(122.782), 
+            avg_green(117.001),
+            avg_blue(104.298) 
+        {
+        }
+
+        input_rgb_image (
+            float avg_red_,
+            float avg_green_,
+            float avg_blue_
+        ) : avg_red(avg_red_), avg_green(avg_green_), avg_blue(avg_blue_) 
+        {}
+
+        float get_avg_red()   const { return avg_red; }
+        float get_avg_green() const { return avg_green; }
+        float get_avg_blue()  const { return avg_blue; }
+
+        template <typename input_iterator>
+        void to_tensor (
+            input_iterator ibegin,
+            input_iterator iend,
+            resizable_tensor& data
+        ) const
+        {
+            DLIB_CASSERT(std::distance(ibegin,iend) > 0,"");
+            const auto nr = ibegin->nr();
+            const auto nc = ibegin->nc();
+            // make sure all the input matrices have the same dimensions
+            for (auto i = ibegin; i != iend; ++i)
+            {
+                DLIB_CASSERT(i->nr()==nr && i->nc()==nc,
+                    "\t input_rgb_image::to_tensor()"
+                    << "\n\t All matrices given to to_tensor() must have the same dimensions."
+                    << "\n\t nr: " << nr
+                    << "\n\t nc: " << nc
+                    << "\n\t i->nr(): " << i->nr()
+                    << "\n\t i->nc(): " << i->nc()
+                );
+            }
+
+            
+            // initialize data to the right size to contain the stuff in the iterator range.
+            data.set_size(std::distance(ibegin,iend), 3, nr, nc);
+
+
+            const size_t offset = nr*nc;
+            auto ptr = data.host();
+            for (auto i = ibegin; i != iend; ++i)
+            {
+                for (long r = 0; r < nr; ++r)
+                {
+                    for (long c = 0; c < nc; ++c)
+                    {
+                        rgb_pixel temp = (*i)(r,c);
+                        auto p = ptr++;
+                        *p = (temp.red-avg_red)/256.0; 
+                        p += offset;
+                        *p = (temp.green-avg_green)/256.0; 
+                        p += offset;
+                        *p = (temp.blue-avg_blue)/256.0; 
+                        p += offset;
+                    }
+                }
+                ptr += offset*(data.k()-1);
+            }
+
+        }
+
+        friend void serialize(const input_rgb_image& item, std::ostream& out)
+        {
+            serialize("input_rgb_image", out);
+            serialize(item.avg_red, out);
+            serialize(item.avg_green, out);
+            serialize(item.avg_blue, out);
+        }
+
+        friend void deserialize(input_rgb_image& item, std::istream& in)
+        {
+            std::string version;
+            deserialize(version, in);
+            if (version != "input_rgb_image")
+                throw serialization_error("Unexpected version found while deserializing dlib::input_rgb_image.");
+            deserialize(item.avg_red, in);
+            deserialize(item.avg_green, in);
+            deserialize(item.avg_blue, in);
+        }
+
+    private:
+        float avg_red;
+        float avg_green;
+        float avg_blue;
+    };
+
 // ----------------------------------------------------------------------------------------

    template <typename T, long NR, long NC, typename MM, typename L>

--- a/dlib/dnn/input_abstract.h
+++ b/dlib/dnn/input_abstract.h
@@ -152,6 +152,89 @@ namespace dlib

 // ----------------------------------------------------------------------------------------

+    class input_rgb_image
+    {
+        /*!
+            WHAT THIS OBJECT REPRESENTS
+                This input layer works with RGB images of type matrix<rgb_pixel>.  It is
+                very similar to the dlib::input layer except that it allows you to subtract
+                the average color value from each color channel when convting an image to a
+                tensor.
+        !*/
+    public:
+        typedef matrix<rgb_pixel> input_type;
+        const static unsigned int sample_expansion_factor = 1;
+
+        input_rgb_image (
+        );
+        /*!
+            ensures
+                - #get_avg_red()   == 122.782
+                - #get_avg_green() == 117.001
+                - #get_avg_blue()  == 104.298
+        !*/
+
+        input_rgb_image (
+            float avg_red,
+            float avg_green,
+            float avg_blue
+        ); 
+        /*!
+            ensures
+                - #get_avg_red() == avg_red
+                - #get_avg_green() == avg_green
+                - #get_avg_blue() == avg_blue
+        !*/
+
+        float get_avg_red(
+        ) const;
+        /*!
+            ensures
+                - returns the value subtracted from the red color channel.
+        !*/
+
+        float get_avg_green(
+        ) const;
+        /*!
+            ensures
+                - returns the value subtracted from the green color channel.
+        !*/
+
+        float get_avg_blue(
+        ) const;
+        /*!
+            ensures
+                - returns the value subtracted from the blue color channel.
+        !*/
+
+        template <typename input_iterator>
+        void to_tensor (
+            input_iterator ibegin,
+            input_iterator iend,
+            resizable_tensor& data
+        ) const;
+        /*!
+            requires
+                - [ibegin, iend) is an iterator range over input_type objects.
+                - std::distance(ibegin,iend) > 0
+                - The input range should contain images that all have the same
+                  dimensions.
+            ensures
+                - Converts the iterator range into a tensor and stores it into #data.  In
+                  particular, if the input images have R rows, C columns then we will have:
+                    - #data.num_samples() == std::distance(ibegin,iend)
+                    - #data.nr() == R
+                    - #data.nc() == C
+                    - #data.k() == 3
+                  Moreover, each color channel is normalized by having its average value
+                  subtracted (accroding to get_avg_red(), get_avg_green(), or
+                  get_avg_blue()) and then is divided by 256.0.
+        !*/
+    };
+
+    void serialize(const input_rgb_image& item, std::ostream& out);
+    void deserialize(input_rgb_image& item, std::istream& in);
+
 }

 #endif // DLIB_DNn_INPUT_ABSTRACT_H_