object_detector.h 18.9 KB
Newer Older
Davis King's avatar
Davis King committed
1
2
3
4
5
6
7
8
// Copyright (C) 2011  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_OBJECT_DeTECTOR_H__
#define DLIB_OBJECT_DeTECTOR_H__

#include "object_detector_abstract.h"
#include "../geometry.h"
#include <vector>
9
#include "box_overlap_testing.h"
10
#include "full_object_detection.h"
Davis King's avatar
Davis King committed
11
12
13
14

namespace dlib
{

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
// ----------------------------------------------------------------------------------------

    template <typename image_scanner_type>
    struct processed_weight_vector
    {
        processed_weight_vector(){}

        typedef typename image_scanner_type::feature_vector_type feature_vector_type;

        void init (
            const image_scanner_type& 
        ) 
        /*!
            requires
                - w has already been assigned its value.  Note that the point of this
                  function is to allow an image scanner to overload the
                  processed_weight_vector template and provide some different kind of
                  object as the output of get_detect_argument().  For example, the
                  scan_fhog_pyramid object uses an overload that causes
                  get_detect_argument() to return the special fhog_filterbank object
                  instead of a feature_vector_type.  This avoids needing to construct the
                  fhog_filterbank during each call to detect and therefore speeds up
                  detection.
        !*/
        {}

        // return the first argument to image_scanner_type::detect()
        const feature_vector_type& get_detect_argument() const { return w; }

        feature_vector_type w;
    };

Davis King's avatar
Davis King committed
47
48
49
// ----------------------------------------------------------------------------------------

    template <
50
        typename image_scanner_type
Davis King's avatar
Davis King committed
51
52
53
54
        >
    class object_detector
    {
    public:
Davis King's avatar
Davis King committed
55
56
        typedef typename image_scanner_type::feature_vector_type feature_vector_type;

Davis King's avatar
Davis King committed
57
58
59
60
61
62
63
64
65
        object_detector (
        );

        object_detector (
            const object_detector& item 
        );

        object_detector (
            const image_scanner_type& scanner_, 
66
            const test_box_overlap& overlap_tester_,
Davis King's avatar
Davis King committed
67
            const feature_vector_type& w_ 
Davis King's avatar
Davis King committed
68
69
        );

70
71
72
73
74
75
        object_detector (
            const image_scanner_type& scanner_, 
            const test_box_overlap& overlap_tester_,
            const std::vector<feature_vector_type>& w_ 
        );

76
77
78
79
        explicit object_detector (
            const std::vector<object_detector>& detectors
        );

80
81
82
        unsigned long num_detectors (
        ) const { return w.size(); }

Davis King's avatar
Davis King committed
83
        const feature_vector_type& get_w (
84
85
            unsigned long idx = 0
        ) const { return w[idx].w; }
86

87
        const test_box_overlap& get_overlap_tester (
88
89
90
91
92
        ) const;

        const image_scanner_type& get_scanner (
        ) const;

Davis King's avatar
Davis King committed
93
94
95
96
97
98
99
100
        object_detector& operator= (
            const object_detector& item 
        );

        template <
            typename image_type
            >
        std::vector<rectangle> operator() (
101
102
            const image_type& img,
            double adjust_threshold = 0
103
        );
Davis King's avatar
Davis King committed
104

105
106
107
108
109
        template <
            typename image_type
            >
        void operator() (
            const image_type& img,
110
            std::vector<std::pair<double, rectangle> >& final_dets,
111
            double adjust_threshold = 0
112
        );
113

114
115
116
117
118
119
120
121
122
        template <
            typename image_type
            >
        void operator() (
            const image_type& img,
            std::vector<std::pair<double, full_object_detection> >& final_dets,
            double adjust_threshold = 0
        );

123
124
125
126
127
128
129
130
131
        template <
            typename image_type
            >
        void operator() (
            const image_type& img,
            std::vector<full_object_detection>& final_dets,
            double adjust_threshold = 0
        );

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
        struct rect_detection
        {
            double detection_confidence;
            unsigned long weight_index;
            rectangle rect;

            bool operator<(const rect_detection& item) const { return detection_confidence < item.detection_confidence; }
        };

        struct full_detection
        {
            double detection_confidence;
            unsigned long weight_index;
            full_object_detection rect;

            bool operator<(const full_detection& item) const { return detection_confidence < item.detection_confidence; }
        };

        template <
            typename image_type
            >
        void operator() (
            const image_type& img,
            std::vector<rect_detection>& final_dets,
            double adjust_threshold = 0
        );

        template <
            typename image_type
            >
        void operator() (
            const image_type& img,
            std::vector<full_detection>& final_dets,
            double adjust_threshold = 0
        );

168
        template <typename T>
Davis King's avatar
Davis King committed
169
        friend void serialize (
170
            const object_detector<T>& item,
Davis King's avatar
Davis King committed
171
172
173
            std::ostream& out
        );

174
        template <typename T>
Davis King's avatar
Davis King committed
175
        friend void deserialize (
176
            object_detector<T>& item,
Davis King's avatar
Davis King committed
177
178
179
180
181
            std::istream& in 
        );

    private:

182
183
184
185
186
187
188
189
        static bool compare_pair_rect (
            const std::pair<double, rectangle>& a,
            const std::pair<double, rectangle>& b
        )
        {
            return a.first < b.first;
        }

Davis King's avatar
Davis King committed
190
        bool overlaps_any_box (
191
            const std::vector<rect_detection>& rects,
192
193
194
195
196
            const dlib::rectangle& rect
        ) const
        {
            for (unsigned long i = 0; i < rects.size(); ++i)
            {
197
                if (boxes_overlap(rects[i].rect, rect))
198
199
200
201
202
                    return true;
            }
            return false;
        }

203
        test_box_overlap boxes_overlap;
204
        std::vector<processed_weight_vector<image_scanner_type> > w;
205
        image_scanner_type scanner;
Davis King's avatar
Davis King committed
206
207
208
209
    };

// ----------------------------------------------------------------------------------------

210
    template <typename T>
Davis King's avatar
Davis King committed
211
    void serialize (
212
        const object_detector<T>& item,
Davis King's avatar
Davis King committed
213
214
215
        std::ostream& out
    )
    {
216
        int version = 2;
217
218
        serialize(version, out);

Davis King's avatar
Davis King committed
219
220
221
222
        T scanner;
        scanner.copy_configuration(item.scanner);
        serialize(scanner, out);
        serialize(item.boxes_overlap, out);
223
224
225
226
        // serialize all the weight vectors
        serialize(item.w.size(), out);
        for (unsigned long i = 0; i < item.w.size(); ++i)
            serialize(item.w[i].w, out);
Davis King's avatar
Davis King committed
227
228
229
230
    }

// ----------------------------------------------------------------------------------------

231
    template <typename T>
Davis King's avatar
Davis King committed
232
    void deserialize (
233
        object_detector<T>& item,
Davis King's avatar
Davis King committed
234
235
236
        std::istream& in 
    )
    {
237
238
        int version = 0;
        deserialize(version, in);
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
        if (version == 1)
        {
            deserialize(item.scanner, in);
            item.w.resize(1);
            deserialize(item.w[0].w, in);
            item.w[0].init(item.scanner);
            deserialize(item.boxes_overlap, in);
        }
        else if (version == 2)
        {
            deserialize(item.scanner, in);
            deserialize(item.boxes_overlap, in);
            unsigned long num_detectors = 0;
            deserialize(num_detectors, in);
            item.w.resize(num_detectors);
            for (unsigned long i = 0; i < item.w.size(); ++i)
            {
                deserialize(item.w[i].w, in);
                item.w[i].init(item.scanner);
            }
        }
        else 
        {
262
            throw serialization_error("Unexpected version encountered while deserializing a dlib::object_detector object.");
263
        }
Davis King's avatar
Davis King committed
264
265
266
267
268
269
270
271
272
    }

// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
//                      object_detector member functions
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

    template <
273
        typename image_scanner_type
Davis King's avatar
Davis King committed
274
        >
275
    object_detector<image_scanner_type>::
Davis King's avatar
Davis King committed
276
277
278
279
280
281
282
283
    object_detector (
    )
    {
    }

// ----------------------------------------------------------------------------------------

    template <
284
        typename image_scanner_type
Davis King's avatar
Davis King committed
285
        >
286
    object_detector<image_scanner_type>::
Davis King's avatar
Davis King committed
287
288
289
290
291
292
293
294
295
296
297
298
    object_detector (
        const object_detector& item 
    )
    {
        boxes_overlap = item.boxes_overlap;
        w = item.w;
        scanner.copy_configuration(item.scanner);
    }

// ----------------------------------------------------------------------------------------

    template <
299
        typename image_scanner_type
Davis King's avatar
Davis King committed
300
        >
301
    object_detector<image_scanner_type>::
Davis King's avatar
Davis King committed
302
303
    object_detector (
        const image_scanner_type& scanner_, 
304
        const test_box_overlap& overlap_tester,
Davis King's avatar
Davis King committed
305
        const feature_vector_type& w_ 
Davis King's avatar
Davis King committed
306
    ) :
307
        boxes_overlap(overlap_tester)
Davis King's avatar
Davis King committed
308
    {
309
310
        // make sure requires clause is not broken
        DLIB_ASSERT(scanner_.get_num_detection_templates() > 0 &&
Davis King's avatar
Davis King committed
311
                    w_.size() == scanner_.get_num_dimensions() + 1, 
312
313
314
315
316
317
318
319
            "\t object_detector::object_detector(scanner_,overlap_tester,w_)"
            << "\n\t Invalid inputs were given to this function "
            << "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
            << "\n\t w_.size():                     " << w_.size()
            << "\n\t scanner_.get_num_dimensions(): " << scanner_.get_num_dimensions()
            << "\n\t this: " << this
            );

Davis King's avatar
Davis King committed
320
        scanner.copy_configuration(scanner_);
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
        w.resize(1);
        w[0].w = w_;
        w[0].init(scanner);
    }

// ----------------------------------------------------------------------------------------

    template <
        typename image_scanner_type
        >
    object_detector<image_scanner_type>::
    object_detector (
        const image_scanner_type& scanner_, 
        const test_box_overlap& overlap_tester,
        const std::vector<feature_vector_type>& w_ 
    ) :
        boxes_overlap(overlap_tester)
    {
        // make sure requires clause is not broken
        DLIB_ASSERT(scanner_.get_num_detection_templates() > 0 && w_.size() > 0,
            "\t object_detector::object_detector(scanner_,overlap_tester,w_)"
            << "\n\t Invalid inputs were given to this function "
            << "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
            << "\n\t w_.size():                     " << w_.size()
            << "\n\t this: " << this
            );

#ifdef ENABLE_ASSERTS
        for (unsigned long i = 0; i < w_.size(); ++i)
        {
            DLIB_ASSERT(w_[i].size() == scanner_.get_num_dimensions() + 1, 
                "\t object_detector::object_detector(scanner_,overlap_tester,w_)"
                << "\n\t Invalid inputs were given to this function "
                << "\n\t scanner_.get_num_detection_templates(): " << scanner_.get_num_detection_templates()
                << "\n\t w_["<<i<<"].size():                     " << w_[i].size()
                << "\n\t scanner_.get_num_dimensions(): " << scanner_.get_num_dimensions()
                << "\n\t this: " << this
                );
        }
#endif

        scanner.copy_configuration(scanner_);
        w.resize(w_.size());
        for (unsigned long i = 0; i < w.size(); ++i)
        {
            w[i].w = w_[i];
            w[i].init(scanner);
        }
Davis King's avatar
Davis King committed
369
370
    }

371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
// ----------------------------------------------------------------------------------------

    template <
        typename image_scanner_type
        >
    object_detector<image_scanner_type>::
    object_detector (
        const std::vector<object_detector>& detectors
    )
    {
        DLIB_ASSERT(detectors.size() != 0,
                "\t object_detector::object_detector(detectors)"
                << "\n\t Invalid inputs were given to this function "
                << "\n\t this: " << this
        );
        std::vector<feature_vector_type> weights;
        weights.reserve(detectors.size());
        for (unsigned long i = 0; i < detectors.size(); ++i)
        {
            for (unsigned long j = 0; j < detectors[i].num_detectors(); ++j)
                weights.push_back(detectors[i].get_w(j));
        }

        *this = object_detector(detectors[0].get_scanner(), detectors[0].get_overlap_tester(), weights);
    }

Davis King's avatar
Davis King committed
397
398
399
// ----------------------------------------------------------------------------------------

    template <
400
        typename image_scanner_type
Davis King's avatar
Davis King committed
401
        >
402
    object_detector<image_scanner_type>& object_detector<image_scanner_type>::
Davis King's avatar
Davis King committed
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
    operator= (
        const object_detector& item 
    )
    {
        if (this == &item)
            return *this;

        boxes_overlap = item.boxes_overlap;
        w = item.w;
        scanner.copy_configuration(item.scanner);
        return *this;
    }

// ----------------------------------------------------------------------------------------

    template <
419
        typename image_scanner_type
Davis King's avatar
Davis King committed
420
421
422
423
        >
    template <
        typename image_type
        >
424
    void object_detector<image_scanner_type>::
Davis King's avatar
Davis King committed
425
    operator() (
426
        const image_type& img,
427
        std::vector<rect_detection>& final_dets,
428
        double adjust_threshold
429
    ) 
Davis King's avatar
Davis King committed
430
    {
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
        scanner.load(img);
        std::vector<std::pair<double, rectangle> > dets;
        std::vector<rect_detection> dets_accum;
        for (unsigned long i = 0; i < w.size(); ++i)
        {
            const double thresh = w[i].w(scanner.get_num_dimensions());
            scanner.detect(w[i].get_detect_argument(), dets, thresh + adjust_threshold);
            for (unsigned long j = 0; j < dets.size(); ++j)
            {
                rect_detection temp;
                temp.detection_confidence = dets[j].first-thresh;
                temp.weight_index = i;
                temp.rect = dets[j].second;
                dets_accum.push_back(temp);
            }
        }

        // Do non-max suppression
        final_dets.clear();
450
451
        if (w.size() > 1)
            std::sort(dets.rbegin(), dets.rend(), compare_pair_rect);
452
        for (unsigned long i = 0; i < dets_accum.size(); ++i)
Davis King's avatar
Davis King committed
453
        {
454
455
            if (overlaps_any_box(final_dets, dets_accum[i].rect))
                continue;
Davis King's avatar
Davis King committed
456

457
458
459
            final_dets.push_back(dets_accum[i]);
        }
    }
Davis King's avatar
Davis King committed
460

461
// ----------------------------------------------------------------------------------------
Davis King's avatar
Davis King committed
462

463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
    template <
        typename image_scanner_type
        >
    template <
        typename image_type
        >
    void object_detector<image_scanner_type>::
    operator() (
        const image_type& img,
        std::vector<full_detection>& final_dets,
        double adjust_threshold 
    )
    {
        std::vector<rect_detection> dets;
        (*this)(img,dets,adjust_threshold);

        final_dets.resize(dets.size());

        // convert all the rectangle detections into full_object_detections.
        for (unsigned long i = 0; i < dets.size(); ++i)
        {
            final_dets[i].detection_confidence = dets[i].detection_confidence;
            final_dets[i].weight_index = dets[i].weight_index;
            final_dets[i].rect = scanner.get_full_object_detection(dets[i].rect, w[dets[i].weight_index].w);
Davis King's avatar
Davis King committed
487
        }
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
    }

// ----------------------------------------------------------------------------------------

    template <
        typename image_scanner_type
        >
    template <
        typename image_type
        >
    std::vector<rectangle> object_detector<image_scanner_type>::
    operator() (
        const image_type& img,
        double adjust_threshold
    ) 
    {
        std::vector<rect_detection> dets;
        (*this)(img,dets,adjust_threshold);

        std::vector<rectangle> final_dets(dets.size());
        for (unsigned long i = 0; i < dets.size(); ++i)
            final_dets[i] = dets[i].rect;
Davis King's avatar
Davis King committed
510
511
512
513

        return final_dets;
    }

514
515
516
// ----------------------------------------------------------------------------------------

    template <
517
        typename image_scanner_type
518
519
520
521
        >
    template <
        typename image_type
        >
522
    void object_detector<image_scanner_type>::
523
524
    operator() (
        const image_type& img,
525
526
        std::vector<std::pair<double, rectangle> >& final_dets,
        double adjust_threshold
527
    ) 
528
    {
529
530
        std::vector<rect_detection> dets;
        (*this)(img,dets,adjust_threshold);
531

532
533
534
        final_dets.resize(dets.size());
        for (unsigned long i = 0; i < dets.size(); ++i)
            final_dets[i] = std::make_pair(dets[i].detection_confidence,dets[i].rect);
535
536
    }

537
538
539
// ----------------------------------------------------------------------------------------

    template <
540
        typename image_scanner_type
541
542
543
544
        >
    template <
        typename image_type
        >
545
    void object_detector<image_scanner_type>::
546
547
548
549
550
551
    operator() (
        const image_type& img,
        std::vector<std::pair<double, full_object_detection> >& final_dets,
        double adjust_threshold
    ) 
    {
552
553
        std::vector<rect_detection> dets;
        (*this)(img,dets,adjust_threshold);
554
555

        final_dets.clear();
556
        final_dets.reserve(dets.size());
557
558

        // convert all the rectangle detections into full_object_detections.
559
        for (unsigned long i = 0; i < dets.size(); ++i)
560
        {
561
562
            final_dets.push_back(std::make_pair(dets[i].detection_confidence, 
                                                scanner.get_full_object_detection(dets[i].rect, w[dets[i].weight_index].w)));
563
564
565
        }
    }

566
567
568
// ----------------------------------------------------------------------------------------

    template <
569
        typename image_scanner_type
570
571
572
573
        >
    template <
        typename image_type
        >
574
    void object_detector<image_scanner_type>::
575
576
577
578
579
580
    operator() (
        const image_type& img,
        std::vector<full_object_detection>& final_dets,
        double adjust_threshold
    ) 
    {
581
582
        std::vector<rect_detection> dets;
        (*this)(img,dets,adjust_threshold);
583
584

        final_dets.clear();
585
        final_dets.reserve(dets.size());
586
587

        // convert all the rectangle detections into full_object_detections.
588
        for (unsigned long i = 0; i < dets.size(); ++i)
589
        {
590
            final_dets.push_back(scanner.get_full_object_detection(dets[i].rect, w[dets[i].weight_index].w));
591
592
593
        }
    }

594
595
596
// ----------------------------------------------------------------------------------------

    template <
597
        typename image_scanner_type
598
        >
599
    const test_box_overlap& object_detector<image_scanner_type>::
600
601
602
603
604
605
606
607
608
    get_overlap_tester (
    ) const
    {
        return boxes_overlap;
    }

// ----------------------------------------------------------------------------------------

    template <
609
        typename image_scanner_type
610
        >
611
    const image_scanner_type& object_detector<image_scanner_type>::
612
613
614
615
616
617
    get_scanner (
    ) const
    {
        return scanner;
    }

Davis King's avatar
Davis King committed
618
619
620
621
622
623
624
// ----------------------------------------------------------------------------------------

}

#endif // DLIB_OBJECT_DeTECTOR_H__