load_image_dataset.h 18 KB
Newer Older
1
2
// Copyright (C) 2012  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
3
4
#ifndef DLIB_LOAD_IMAGE_DaTASET_Hh_
#define DLIB_LOAD_IMAGE_DaTASET_Hh_
5
6
7
8
9
10
11
12
13
14

#include "load_image_dataset_abstract.h"
#include "../misc_api.h"
#include "../dir_nav.h"
#include "../image_io.h"
#include "../array.h"
#include <vector>
#include "../geometry.h"
#include "image_dataset_metadata.h"
#include <string>
15
#include <set>
16
#include "../image_processing/full_object_detection.h"
17
#include <utility>
18
19
#include <limits>
#include "../image_transforms/image_pyramid.h"
20
21
22
23
24


namespace dlib
{

25
26
27
28
29
30
31
32
33
34
// ----------------------------------------------------------------------------------------

    class image_dataset_file
    {
    public:
        image_dataset_file(const std::string& filename)
        {
            _skip_empty_images = false;
            _have_parts = false;
            _filename = filename;
35
            _box_area_thresh = std::numeric_limits<double>::infinity();
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
        }

        image_dataset_file boxes_match_label(
            const std::string& label
        ) const
        {
            image_dataset_file temp(*this);
            temp._labels.insert(label);
            return temp;
        }

        image_dataset_file skip_empty_images(
        ) const
        {
            image_dataset_file temp(*this);
            temp._skip_empty_images = true;
            return temp;
        }

        image_dataset_file boxes_have_parts(
        ) const
        {
            image_dataset_file temp(*this);
            temp._have_parts = true;
            return temp;
        }

63
64
65
66
67
68
69
70
71
        image_dataset_file shrink_big_images(
            double new_box_area_thresh = 150*150
        ) const
        {
            image_dataset_file temp(*this);
            temp._box_area_thresh = new_box_area_thresh;
            return temp;
        }

72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
        bool should_load_box (
            const image_dataset_metadata::box& box
        ) const
        {
            if (_have_parts && box.parts.size() == 0)
                return false;
            if (_labels.size() == 0)
                return true;
            if (_labels.count(box.label) != 0)
                return true;
            return false;
        }

        const std::string& get_filename() const { return _filename; }
        bool should_skip_empty_images() const { return _skip_empty_images; }
        bool should_boxes_have_parts() const { return _have_parts; }
88
        double box_area_thresh() const { return _box_area_thresh; }
89
90
91
92
93
94
95
        const std::set<std::string>& get_selected_box_labels() const { return _labels; }

    private:
        std::string _filename;
        std::set<std::string> _labels;
        bool _skip_empty_images;
        bool _have_parts;
96
97
        double _box_area_thresh;

98
99
    };

100
101
102
// ----------------------------------------------------------------------------------------

    template <
103
        typename array_type
104
        >
105
    std::vector<std::vector<rectangle> > load_image_dataset (
106
        array_type& images,
107
        std::vector<std::vector<rectangle> >& object_locations,
108
        const image_dataset_file& source
109
110
111
112
113
    )
    {
        images.clear();
        object_locations.clear();

114
115
        std::vector<std::vector<rectangle> > ignored_rects;

116
117
        using namespace dlib::image_dataset_metadata;
        dataset data;
118
        load_image_dataset_metadata(data, source.get_filename());
119

120
121
122
        // Set the current directory to be the one that contains the
        // metadata file. We do this because the file might contain
        // file paths which are relative to this folder.
123
        locally_change_current_dir chdir(get_parent_directory(file(source.get_filename())));
124
125


126
        typedef typename array_type::value_type image_type;
127
128


129
        image_type img;
130
        std::vector<rectangle> rects, ignored;
131
132
        for (unsigned long i = 0; i < data.images.size(); ++i)
        {
133
            double min_rect_size = std::numeric_limits<double>::infinity();
134
            rects.clear();
135
            ignored.clear();
136
137
            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
            {
138
                if (source.should_load_box(data.images[i].boxes[j]))
139
                {
140
                    if (data.images[i].boxes[j].ignore)
141
                    {
142
                        ignored.push_back(data.images[i].boxes[j].rect);
143
                    }
144
                    else
145
                    {
146
                        rects.push_back(data.images[i].boxes[j].rect);
147
148
                        min_rect_size = std::min<double>(min_rect_size, rects.back().area());
                    }
149
150
                }
            }
151

152
            if (!source.should_skip_empty_images() || rects.size() != 0)
153
154
            {
                load_image(img, data.images[i].filename);
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
                if (rects.size() != 0)  
                {
                    // if shrinking the image would still result in the smallest box being
                    // bigger than the box area threshold then shrink the image.
                    while(min_rect_size/2/2 > source.box_area_thresh())
                    {
                        pyramid_down<2> pyr;
                        pyr(img);
                        min_rect_size *= (1.0/2.0)*(1.0/2.0);
                        for (auto&& r : rects)
                            r = pyr.rect_down(r);
                        for (auto&& r : ignored)
                            r = pyr.rect_down(r);
                    }
                    while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh())
                    {
                        pyramid_down<3> pyr;
                        pyr(img);
                        min_rect_size *= (2.0/3.0)*(2.0/3.0);
                        for (auto&& r : rects)
                            r = pyr.rect_down(r);
                        for (auto&& r : ignored)
                            r = pyr.rect_down(r);
                    }
                }
180
                images.push_back(img);
181
182
                object_locations.push_back(rects);
                ignored_rects.push_back(ignored);
183
            }
184
185
        }

186
        return ignored_rects;
187
188
    }

189
190
// ----------------------------------------------------------------------------------------

191
192
    namespace impl
    {
Davis King's avatar
Davis King committed
193
        inline size_t num_non_ignored_boxes (const std::vector<mmod_rect>& rects)
194
195
196
197
198
199
200
201
202
203
204
        {
            size_t cnt = 0;
            for (auto& b : rects)
            {
                if (!b.ignore)
                    cnt++;
            }
            return cnt;
        }
    }

205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
    template <
        typename array_type
        >
    void load_image_dataset (
        array_type& images,
        std::vector<std::vector<mmod_rect> >& object_locations,
        const image_dataset_file& source
    )
    {
        images.clear();
        object_locations.clear();

        using namespace dlib::image_dataset_metadata;
        dataset data;
        load_image_dataset_metadata(data, source.get_filename());

        // Set the current directory to be the one that contains the
        // metadata file. We do this because the file might contain
        // file paths which are relative to this folder.
        locally_change_current_dir chdir(get_parent_directory(file(source.get_filename())));

        typedef typename array_type::value_type image_type;

        image_type img;
        std::vector<mmod_rect> rects;
        for (unsigned long i = 0; i < data.images.size(); ++i)
        {
232
            double min_rect_size = std::numeric_limits<double>::infinity();
233
234
235
236
237
238
            rects.clear();
            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
            {
                if (source.should_load_box(data.images[i].boxes[j]))
                {
                    if (data.images[i].boxes[j].ignore)
239
                    {
240
                        rects.push_back(ignored_mmod_rect(data.images[i].boxes[j].rect));
241
                    }
242
                    else
243
                    {
244
                        rects.push_back(mmod_rect(data.images[i].boxes[j].rect));
245
246
                        min_rect_size = std::min<double>(min_rect_size, rects.back().rect.area());
                    }
247
                    rects.back().label = data.images[i].boxes[j].label;
248

249
250
251
                }
            }

252
            if (!source.should_skip_empty_images() || impl::num_non_ignored_boxes(rects) != 0)
253
254
            {
                load_image(img, data.images[i].filename);
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
                if (rects.size() != 0)  
                {
                    // if shrinking the image would still result in the smallest box being
                    // bigger than the box area threshold then shrink the image.
                    while(min_rect_size/2/2 > source.box_area_thresh())
                    {
                        pyramid_down<2> pyr;
                        pyr(img);
                        min_rect_size *= (1.0/2.0)*(1.0/2.0);
                        for (auto&& r : rects)
                            r.rect = pyr.rect_down(r.rect);
                    }
                    while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh())
                    {
                        pyramid_down<3> pyr;
                        pyr(img);
                        min_rect_size *= (2.0/3.0)*(2.0/3.0);
                        for (auto&& r : rects)
                            r.rect = pyr.rect_down(r.rect);
                    }
                }
276
                images.push_back(std::move(img));
277
                object_locations.push_back(std::move(rects));
278
279
280
281
            }
        }
    }

282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
// ----------------------------------------------------------------------------------------

// ******* THIS FUNCTION IS DEPRECATED, you should use another version of load_image_dataset() *******
    template <
        typename image_type, 
        typename MM
        >
    std::vector<std::vector<rectangle> > load_image_dataset (
        array<image_type,MM>& images,
        std::vector<std::vector<rectangle> >& object_locations,
        const std::string& filename,
        const std::string& label,
        bool skip_empty_images = false
    )
    {
        image_dataset_file f(filename);
        if (label.size() != 0)
            f = f.boxes_match_label(label);
        if (skip_empty_images)
            f = f.skip_empty_images();
        return load_image_dataset(images, object_locations, f);
    }

305
306
// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
307
    template <
308
        typename array_type
Davis King's avatar
Davis King committed
309
        >
310
    std::vector<std::vector<rectangle> > load_image_dataset (
311
        array_type& images,
312
313
314
315
        std::vector<std::vector<rectangle> >& object_locations,
        const std::string& filename
    )
    {
316
        return load_image_dataset(images, object_locations, image_dataset_file(filename));
317
318
    }

319
320
321
322
323
324
325
326
327
328
329
330
331
332
// ----------------------------------------------------------------------------------------

    template <
        typename array_type
        >
    void load_image_dataset (
        array_type& images,
        std::vector<std::vector<mmod_rect>>& object_locations,
        const std::string& filename
    )
    {
        load_image_dataset(images, object_locations, image_dataset_file(filename));
    }

333
334
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
335
// ----------------------------------------------------------------------------------------
336
337

    template <
338
        typename array_type
339
        >
340
    std::vector<std::vector<rectangle> > load_image_dataset (
341
        array_type& images,
342
        std::vector<std::vector<full_object_detection> >& object_locations,
343
344
        const image_dataset_file& source,
        std::vector<std::string>& parts_list
345
346
    )
    {
347
        typedef typename array_type::value_type image_type;
348
        parts_list.clear();
349
350
351
        images.clear();
        object_locations.clear();

352
353
        using namespace dlib::image_dataset_metadata;
        dataset data;
354
        load_image_dataset_metadata(data, source.get_filename());
355

356
357
358
        // Set the current directory to be the one that contains the
        // metadata file. We do this because the file might contain
        // file paths which are relative to this folder.
359
        locally_change_current_dir chdir(get_parent_directory(file(source.get_filename())));
360
361
362
363
364
365
366
367
368


        std::set<std::string> all_parts;

        // find out what parts are being used in the dataset.  Store results in all_parts.
        for (unsigned long i = 0; i < data.images.size(); ++i)
        {
            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
            {
369
                if (source.should_load_box(data.images[i].boxes[j]))
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
                {
                    const std::map<std::string,point>& parts = data.images[i].boxes[j].parts;
                    std::map<std::string,point>::const_iterator itr;

                    for (itr = parts.begin(); itr != parts.end(); ++itr)
                    {
                        all_parts.insert(itr->first);
                    }
                }
            }
        }

        // make a mapping between part names and the integers [0, all_parts.size())
        std::map<std::string,int> parts_idx;
        for (std::set<std::string>::iterator i = all_parts.begin(); i != all_parts.end(); ++i)
        {
386
387
            parts_idx[*i] = parts_list.size();
            parts_list.push_back(*i);
388
389
        }

390
391
        std::vector<std::vector<rectangle> > ignored_rects;
        std::vector<rectangle> ignored;
392
        image_type img;
393
394
395
        std::vector<full_object_detection> object_dets;
        for (unsigned long i = 0; i < data.images.size(); ++i)
        {
396
            double min_rect_size = std::numeric_limits<double>::infinity();
397
            object_dets.clear();
398
            ignored.clear();
399
400
            for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j)
            {
401
                if (source.should_load_box(data.images[i].boxes[j]))
402
                {
403
                    if (data.images[i].boxes[j].ignore)
404
                    {
405
                        ignored.push_back(data.images[i].boxes[j].rect);
406
                    }
407
408
409
                    else
                    {
                        std::vector<point> partlist(parts_idx.size(), OBJECT_PART_NOT_PRESENT);
410

411
412
413
414
415
416
417
418
419
                        // populate partlist with all the parts present in this box.
                        const std::map<std::string,point>& parts = data.images[i].boxes[j].parts;
                        std::map<std::string,point>::const_iterator itr;
                        for (itr = parts.begin(); itr != parts.end(); ++itr)
                        {
                            partlist[parts_idx[itr->first]] = itr->second;
                        }

                        object_dets.push_back(full_object_detection(data.images[i].boxes[j].rect, partlist));
420
                        min_rect_size = std::min<double>(min_rect_size, object_dets.back().get_rect().area());
421
                    }
422
423
                }
            }
424

425
            if (!source.should_skip_empty_images() || object_dets.size() != 0)
426
427
            {
                load_image(img, data.images[i].filename);
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
                if (object_dets.size() != 0)  
                {
                    // if shrinking the image would still result in the smallest box being
                    // bigger than the box area threshold then shrink the image.
                    while(min_rect_size/2/2 > source.box_area_thresh())
                    {
                        pyramid_down<2> pyr;
                        pyr(img);
                        min_rect_size *= (1.0/2.0)*(1.0/2.0);
                        for (auto&& r : object_dets)
                        {
                            r.get_rect() = pyr.rect_down(r.get_rect());
                            for (unsigned long k = 0; k < r.num_parts(); ++k)
                                r.part(k) = pyr.point_down(r.part(k));
                        }
                        for (auto&& r : ignored)
                        {
                            r = pyr.rect_down(r);
                        }
                    }
                    while(min_rect_size*(2.0/3.0)*(2.0/3.0) > source.box_area_thresh())
                    {
                        pyramid_down<3> pyr;
                        pyr(img);
                        min_rect_size *= (2.0/3.0)*(2.0/3.0);
                        for (auto&& r : object_dets)
                        {
                            r.get_rect() = pyr.rect_down(r.get_rect());
                            for (unsigned long k = 0; k < r.num_parts(); ++k)
                                r.part(k) = pyr.point_down(r.part(k));
                        }
                        for (auto&& r : ignored)
                        {
                            r = pyr.rect_down(r);
                        }
                    }
                }
465
                images.push_back(img);
466
467
                object_locations.push_back(object_dets);
                ignored_rects.push_back(ignored);
468
            }
469
470
471
        }


472
473
474
475
476
477
        return ignored_rects;
    }

// ----------------------------------------------------------------------------------------

    template <
478
        typename array_type
479
480
        >
    std::vector<std::vector<rectangle> > load_image_dataset (
481
        array_type& images,
482
483
484
485
486
487
        std::vector<std::vector<full_object_detection> >& object_locations,
        const image_dataset_file& source 
    )
    {
        std::vector<std::string> parts_list;
        return load_image_dataset(images, object_locations, source, parts_list);
488
489
490
491
492
    }

// ----------------------------------------------------------------------------------------

    template <
493
        typename array_type 
494
        >
495
    std::vector<std::vector<rectangle> > load_image_dataset (
496
        array_type& images,
497
498
499
500
        std::vector<std::vector<full_object_detection> >& object_locations,
        const std::string& filename
    )
    {
501
502
        std::vector<std::string> parts_list;
        return load_image_dataset(images, object_locations, image_dataset_file(filename), parts_list);
503
504
505
506
    }

// ----------------------------------------------------------------------------------------

507
508
}

509
#endif // DLIB_LOAD_IMAGE_DaTASET_Hh_
510