// Copyright (C) 2013 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_fHOG_H__ #define DLIB_fHOG_H__ #include "fhog_abstract.h" #include "../matrix.h" #include "../array2d.h" #include "../array.h" #include "../geometry.h" #include "assign_image.h" #include "draw.h" #include "interpolation.h" namespace dlib { // ---------------------------------------------------------------------------------------- namespace impl_fhog { template inline typename dlib::enable_if_c::rgb>::type get_gradient ( const int r, const int c, const image_type& img, matrix& grad, double& len ) { matrix grad2, grad3; // get the red gradient grad = (int)img[r][c+1].red-(int)img[r][c-1].red, (int)img[r+1][c].red-(int)img[r-1][c].red; len = length_squared(grad); // get the green gradient grad2 = (int)img[r][c+1].green-(int)img[r][c-1].green, (int)img[r+1][c].green-(int)img[r-1][c].green; double v2 = length_squared(grad2); // get the blue gradient grad3 = (int)img[r][c+1].blue-(int)img[r][c-1].blue, (int)img[r+1][c].blue-(int)img[r-1][c].blue; double v3 = length_squared(grad3); // pick color with strongest gradient if (v2 > len) { len = v2; grad = grad2; } if (v3 > len) { len = v3; grad = grad3; } } // ------------------------------------------------------------------------------------ template inline typename dlib::disable_if_c::rgb>::type get_gradient ( const int r, const int c, const image_type& img, matrix& grad, double& len ) { grad = (int)get_pixel_intensity(img[r][c+1])-(int)get_pixel_intensity(img[r][c-1]), (int)get_pixel_intensity(img[r+1][c])-(int)get_pixel_intensity(img[r-1][c]); len = length_squared(grad); } // ------------------------------------------------------------------------------------ template void set_hog ( dlib::array,mm2>& hog, int o, int x, int y, const double& value ) { hog[o][y][x] = value; } template void init_hog ( dlib::array,mm2>& hog, int hog_nr, int hog_nc ) { const int num_hog_bands = 27+4; hog.resize(num_hog_bands); for (int i = 0; i < num_hog_bands; ++i) { hog[i].set_size(hog_nr, hog_nc); } } // ------------------------------------------------------------------------------------ template void set_hog ( array2d,mm>& hog, int o, int x, int y, const double& value ) { hog[y][x](o) = value; } template void init_hog ( array2d,mm>& hog, int hog_nr, int hog_nc ) { hog.set_size(hog_nr, hog_nc); } // ------------------------------------------------------------------------------------ template < typename image_type, typename out_type > void impl_extract_fhog_features( const image_type& img, out_type& hog, int cell_size ) { /* This function implements the HOG feature extraction method described in the paper: P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan Object Detection with Discriminatively Trained Part Based Models IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010 Moreover, this function is derived from the HOG feature extraction code from the features.cc file in the voc-releaseX code (see http://people.cs.uchicago.edu/~rbg/latent/) which is has the following license (note that the code has been modified to work with grayscale and color as well as planar and interlaced input and output formats): Copyright (C) 2011, 2012 Ross Girshick, Pedro Felzenszwalb Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick Copyright (C) 2007 Pedro Felzenszwalb, Deva Ramanan Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ // unit vectors used to compute gradient orientation matrix directions[9]; directions[0] = 1.0000, 0.0000; directions[1] = 0.9397, 0.3420; directions[2] = 0.7660, 0.6428; directions[3] = 0.500, 0.8660; directions[4] = 0.1736, 0.9848; directions[5] = -0.1736, 0.9848; directions[6] = -0.5000, 0.8660; directions[7] = -0.7660, 0.6428; directions[8] = -0.9397, 0.3420; // First we allocate memory for caching orientation histograms & their norms. const int cells_nr = (int)((double)img.nr()/(double)cell_size + 0.5); const int cells_nc = (int)((double)img.nc()/(double)cell_size + 0.5); if (cells_nr == 0 || cells_nc == 0) { hog.clear(); return; } array2d > hist(cells_nr, cells_nc); for (long r = 0; r < hist.nr(); ++r) { for (long c = 0; c < hist.nc(); ++c) { hist[r][c] = 0; } } array2d norm(cells_nr, cells_nc); assign_all_pixels(norm, 0); // memory for HOG features const int hog_nr = std::max(cells_nr-2, 0); const int hog_nc = std::max(cells_nc-2, 0); if (hog_nr == 0 || hog_nc == 0) { hog.clear(); return; } init_hog(hog, hog_nr, hog_nc); const int visible_nr = cells_nr*cell_size; const int visible_nc = cells_nc*cell_size; // First populate the gradient histograms for (int y = 1; y < visible_nr-1; y++) { for (int x = 1; x < visible_nc-1; x++) { const int r = std::min(y, img.nr()-2); const int c = std::min(x, img.nc()-2); matrix grad; double v; get_gradient(r,c,img,grad,v); v = std::sqrt(v); // snap to one of 18 orientations double best_dot = 0; int best_o = 0; for (int o = 0; o < 9; o++) { const double dot = dlib::dot(directions[o], grad); if (dot > best_dot) { best_dot = dot; best_o = o; } else if (-dot > best_dot) { best_dot = -dot; best_o = o+9; } } // add to 4 histograms around pixel using bilinear interpolation double xp = ((double)x+0.5)/(double)cell_size - 0.5; double yp = ((double)y+0.5)/(double)cell_size - 0.5; int ixp = (int)std::floor(xp); int iyp = (int)std::floor(yp); double vx0 = xp-ixp; double vy0 = yp-iyp; double vx1 = 1.0-vx0; double vy1 = 1.0-vy0; if (ixp >= 0 && iyp >= 0) hist[iyp][ixp](best_o) += vy1*vx1*v; if (iyp+1 < cells_nr && ixp >= 0) hist[iyp+1][ixp](best_o) += vy0*vx1*v; if (iyp >= 0 && ixp+1 < cells_nc) hist[iyp][ixp+1](best_o) += vy1*vx0*v; if (ixp+1 < cells_nc && iyp+1 < cells_nr) hist[iyp+1][ixp+1](best_o) += vy0*vx0*v; } } // compute energy in each block by summing over orientations for (int r = 0; r < cells_nr; ++r) { for (int c = 0; c < cells_nc; ++c) { for (int o = 0; o < 9; o++) { norm[r][c] += (hist[r][c](o) + hist[r][c](o+9)) * (hist[r][c](o) + hist[r][c](o+9)); } } } const double eps = 0.0001; // compute features for (int y = 0; y < hog_nr; y++) { for (int x = 0; x < hog_nc; x++) { double n1, n2, n3, n4; n1 = 1.0 / std::sqrt(norm[y+1][x+1] + norm[y+1][x+2] + norm[y+2][x+1] + norm[y+2][x+2] + eps); n2 = 1.0 / std::sqrt(norm[y][x+1] + norm[y][x+2] + norm[y+1][x+1] + norm[y+1][x+2] + eps); n3 = 1.0 / std::sqrt(norm[y+1][x] + norm[y+1][x+1] + norm[y+2][x] + norm[y+2][x+1] + eps); n4 = 1.0 / std::sqrt(norm[y][x] + norm[y][x+1] + norm[y+1][x] + norm[y+1][x+1] + eps); double t1 = 0; double t2 = 0; double t3 = 0; double t4 = 0; // contrast-sensitive features for (int o = 0; o < 18; o++) { double h1 = std::min(hist[y+1][x+1](o) * n1, 0.2); double h2 = std::min(hist[y+1][x+1](o) * n2, 0.2); double h3 = std::min(hist[y+1][x+1](o) * n3, 0.2); double h4 = std::min(hist[y+1][x+1](o) * n4, 0.2); set_hog(hog,o,x,y,0.5 * (h1 + h2 + h3 + h4)); t1 += h1; t2 += h2; t3 += h3; t4 += h4; } // contrast-insensitive features for (int o = 0; o < 9; o++) { double sum = hist[y+1][x+1](o) + hist[y+1][x+1](o+9); double h1 = std::min(sum * n1, 0.2); double h2 = std::min(sum * n2, 0.2); double h3 = std::min(sum * n3, 0.2); double h4 = std::min(sum * n4, 0.2); set_hog(hog,o+18,x,y, 0.5 * (h1 + h2 + h3 + h4)); } // texture features set_hog(hog,27,x,y, 0.2357 * t1); set_hog(hog,28,x,y, 0.2357 * t2); set_hog(hog,29,x,y, 0.2357 * t3); set_hog(hog,30,x,y, 0.2357 * t4); } } } // ------------------------------------------------------------------------------------ inline void create_fhog_bar_images ( dlib::array >& mbars, const long w ) { const long bdims = 9; // Make the oriented lines we use to draw on each HOG cell. mbars.resize(bdims); dlib::array > bars(bdims); array2d temp(w,w); for (unsigned long i = 0; i < bars.size(); ++i) { assign_all_pixels(temp, 0); draw_line(temp, point(w/2,0), point(w/2,w-1), 255); rotate_image(temp, bars[i], i*-pi/bars.size()); mbars[i] = subm(matrix_cast(mat(bars[i])), centered_rect(get_rect(bars[i]),w,w) ); } } } // end namespace impl_fhog // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < typename image_type, typename T, typename mm1, typename mm2 > void extract_fhog_features( const image_type& img, dlib::array,mm2>& hog, int cell_size = 8 ) { return impl_fhog::impl_extract_fhog_features(img, hog, cell_size); } template < typename image_type, typename T, typename mm > void extract_fhog_features( const image_type& img, array2d,mm>& hog, int cell_size = 8 ) { return impl_fhog::impl_extract_fhog_features(img, hog, cell_size); } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- inline point image_to_fhog ( point p, int cell_size = 8 ) { // There is a one pixel border around the imag. p -= point(1,1); // There is also a 1 "cell" border around the HOG image formation. return p/cell_size - point(1,1); } // ---------------------------------------------------------------------------------------- inline rectangle image_to_fhog ( const rectangle& rect, int cell_size = 8 ) { return rectangle(image_to_fhog(rect.tl_corner(),cell_size), image_to_fhog(rect.br_corner(),cell_size)); } // ---------------------------------------------------------------------------------------- inline point fhog_to_image ( point p, int cell_size = 8 ) { // Convert to image space and then set to the center of the cell. point offset; if (p.x() >= 0 && p.y() >= 0) offset = point(cell_size/2,cell_size/2); if (p.x() < 0 && p.y() >= 0) offset = point(-cell_size/2,cell_size/2); if (p.x() >= 0 && p.y() < 0) offset = point(cell_size/2,-cell_size/2); if (p.x() < 0 && p.y() < 0) offset = point(-cell_size/2,-cell_size/2); return (p+point(1,1))*cell_size + point(1,1) + offset; } // ---------------------------------------------------------------------------------------- inline rectangle fhog_to_image ( const rectangle& rect, int cell_size = 8 ) { return rectangle(fhog_to_image(rect.tl_corner(),cell_size), fhog_to_image(rect.br_corner(),cell_size)); } // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < typename T, typename mm1, typename mm2 > matrix draw_fhog( const dlib::array,mm2>& hog, const long w = 15 ) { dlib::array > mbars; impl_fhog::create_fhog_bar_images(mbars,w); // now draw the bars onto the HOG cells matrix himg(hog[0].nr()*w, hog[0].nc()*w); himg = 0; for (unsigned long d = 0; d < mbars.size(); ++d) { for (long r = 0; r < himg.nr(); r+=w) { for (long c = 0; c < himg.nc(); c+=w) { const float val = hog[d][r/w][c/w] + hog[d+mbars.size()][r/w][c/w] + hog[d+mbars.size()*2][r/w][c/w]; if (val > 0) { set_subm(himg, r, c, w, w) += val*mbars[d%mbars.size()]; } } } } const double thresh = mean(himg) + 4*stddev(himg); return matrix_cast(upperbound(round(himg*255/thresh),255)); } // ---------------------------------------------------------------------------------------- template < typename T, typename mm > matrix draw_fhog( const array2d,mm>& hog, const long w = 15 ) { dlib::array > mbars; impl_fhog::create_fhog_bar_images(mbars,w); // now draw the bars onto the HOG cells matrix himg(hog.nr()*w, hog.nc()*w); himg = 0; for (unsigned long d = 0; d < mbars.size(); ++d) { for (long r = 0; r < himg.nr(); r+=w) { for (long c = 0; c < himg.nc(); c+=w) { const float val = hog[r/w][c/w](d) + hog[r/w][c/w](d+mbars.size()) + hog[r/w][c/w](d+mbars.size()*2); if (val > 0) { set_subm(himg, r, c, w, w) += val*mbars[d%mbars.size()]; } } } } const double thresh = mean(himg) + 4*stddev(himg); return matrix_cast(upperbound(round(himg*255/thresh),255)); } // ---------------------------------------------------------------------------------------- } #endif // DLIB_fHOG_H__