core.h 53.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
// Copyright (C) 2015  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_DNn_CORE_H_
#define DLIB_DNn_CORE_H_

#include "core_abstract.h"
#include "tensor.h"
#include <iterator>
#include <memory>
#include <type_traits>
Davis King's avatar
Davis King committed
11
12
#include "../statistics.h"
#include "../rand.h"
13
#include "../algs.h"
14
15
16
17
18
19
20
21
#include <utility>


namespace dlib
{

// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
22
23
24
25
    // Tell us if T is one of the special layer types (i.e. add_layer, add_tag_layer, or
    // add_skip_layer).
    template <typename T> struct is_nonloss_layer_type : std::false_type {};
    // Tell us if T is an instance of add_loss_layer.
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
    template <typename T> struct is_loss_layer_type : std::false_type {};

// ----------------------------------------------------------------------------------------

    inline void randomize_parameters (
        tensor& params,
        unsigned long num_inputs_and_outputs,
        dlib::rand& rnd
    )
    {
        float* data = params.host();
        for (size_t i = 0; i < params.size(); ++i)
        {
            // Draw a random number to initialize the layer according to formula (16)
            // from Understanding the difficulty of training deep feedforward neural
            // networks by Xavier Glorot and Yoshua Bengio.
            float val = 2*rnd.get_random_float()-1;
            val *= std::sqrt(6.0/(num_inputs_and_outputs));

            data[i] = val;
        }
    }

// ----------------------------------------------------------------------------------------

    template <typename T, size_t N>
    class sstack
    {
Davis King's avatar
Davis King committed
54
55
56
57
    public:
        static_assert(N > 0, "You can't create an empty sstack.");
        typedef T value_type;
        const static size_t num_elements = N;
58

Davis King's avatar
Davis King committed
59
60
        sstack() {}
        sstack(const T& item_) : item(item_), data(item_) {}
61

Davis King's avatar
Davis King committed
62
63
        const T& top() const { return item; }
        T& top() { return item; }
64

Davis King's avatar
Davis King committed
65
        size_t size() const { return N; }
66

Davis King's avatar
Davis King committed
67
68
        const sstack<T,N-1>& pop() const { return data; }
        sstack<T,N-1>& pop() { return data; }
69

70
71
72
73
74
75
76
77
78
79
80
81
        friend void serialize(const sstack& item, std::ostream& out)
        {
            serialize(item.top(), out);
            serialize(item.pop(), out);
        }

        friend void deserialize(sstack& item, std::istream& in)
        {
            deserialize(item.top(), in);
            deserialize(item.pop(), in);
        }

Davis King's avatar
Davis King committed
82
83
84
    private:
        T item;
        sstack<T,N-1> data;
85
86
87
88
89
90
91
92
93
94
95
96
97
    };

    template <typename T>
    class sstack<T,1> // base case of recursive definition.
    {
    public:
        sstack() {}
        explicit sstack(const T& item_) : item(item_) {}

        const T& top() const { return item; }
        T& top() { return item; }

        size_t size() const { return 1; }
98
99
100
101
102
103
104
105
106
107
108

        friend void serialize(const sstack& item, std::ostream& out)
        {
            serialize(item.top(), out);
        }

        friend void deserialize(sstack& item, std::istream& in)
        {
            deserialize(item.top(), in);
        }

109
110
111
112
113
114
115
116
117
118
119
    private:
        T item;
    };

// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

    namespace dimpl
    {
        template <typename T, typename enabled=void>
Davis King's avatar
Davis King committed
120
        class subnet_wrapper
121
122
123
        {
            /*!
                WHAT THIS OBJECT REPRESENTS
Davis King's avatar
Davis King committed
124
                    This is a tool that makes an add_layer or add_loss_layer object
Davis King's avatar
Davis King committed
125
                    expose only the part of its interface defined by the SUBNET
126
                    type in layers_abstract.h.  This way, when we pass subnetwork
127
                    objects to the layer callbacks those callbacks won't be able to 
128
                    interact with the subnetworks in a way other than specified 
Davis King's avatar
Davis King committed
129
                    by the SUBNET interface spec.
130
131
132
            !*/

        public:
Davis King's avatar
Davis King committed
133
134
            subnet_wrapper(const subnet_wrapper&) = delete;
            subnet_wrapper& operator=(const subnet_wrapper&) = delete;
135

Davis King's avatar
Davis King committed
136
            subnet_wrapper(T& l_) {}
137
138
139
140
141
            // Nothing here because in this case T is one of the input layer types 
            // that doesn't have anything in it.
        };

        template <typename T>
Davis King's avatar
Davis King committed
142
        class subnet_wrapper<T,typename std::enable_if<is_nonloss_layer_type<T>::value>::type>
143
144
145
        {

        public:
Davis King's avatar
Davis King committed
146
147
            subnet_wrapper(const subnet_wrapper&) = delete;
            subnet_wrapper& operator=(const subnet_wrapper&) = delete;
148

149
150
151
            typedef T wrapped_type;
            const static size_t num_layers = T::num_layers;

Davis King's avatar
Davis King committed
152
            subnet_wrapper(T& l_) : l(l_),subnetwork(l.subnet()) {}
153
154
155
156

            const tensor& get_output() const { return l.get_output(); }
            tensor& get_gradient_input() { return l.get_gradient_input(); }

Davis King's avatar
Davis King committed
157
158
            const subnet_wrapper<typename T::subnet_type>& subnet() const { subnetwork; }
            subnet_wrapper<typename T::subnet_type>& subnet() { subnetwork; }
159
160
161

        private:
            T& l;
Davis King's avatar
Davis King committed
162
            subnet_wrapper<typename T::subnet_type> subnetwork;
163
164
165
        };
    }

Davis King's avatar
Davis King committed
166
167
// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
168
    template <typename LAYER_DETAILS, typename SUBNET, typename enabled = void>
169
170
171
    class add_layer;

    template <typename T, typename U>
Davis King's avatar
Davis King committed
172
    struct is_nonloss_layer_type<add_layer<T,U>> : std::true_type {};
173

Davis King's avatar
Davis King committed
174
175
176
    template <typename LAYER_DETAILS, typename SUBNET>
    class add_layer<LAYER_DETAILS,SUBNET, 
            typename std::enable_if<is_nonloss_layer_type<SUBNET>::value>::type>
177
178
179
    {
    public:
        typedef LAYER_DETAILS layer_details_type;
Davis King's avatar
Davis King committed
180
181
182
183
        typedef SUBNET subnet_type;
        typedef typename subnet_type::input_type input_type;
        const static size_t num_layers = subnet_type::num_layers + 1;
        const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
184
185
186
187
188
189
190
191
192
193

        add_layer(
        ):
            this_layer_setup_called(false),
            gradient_input_is_stale(true)
        {
        }

        add_layer(const add_layer&) = default;
        add_layer& operator=(const add_layer&) = default;
194
195
        add_layer(add_layer&& item) : add_layer() { swap(item); }
        add_layer& operator=(add_layer&& item) { swap(item); return *this; }
196
197
198
199
200
201
202
203
204
205

        template <typename T, typename U, typename E>
        friend class add_layer;

        // Allow copying networks from one to another as long as their corresponding 
        // layers can be constructed from each other.
        template <typename T, typename U, typename E>
        add_layer(
            const add_layer<T,U,E>& item
        ) :
Davis King's avatar
Davis King committed
206
            subnetwork(item.subnet()),
207
208
209
210
211
212
213
214
215
216
217
218
219
220
            details(item.layer_details()), 
            this_layer_setup_called(item.this_layer_setup_called),
            gradient_input_is_stale(item.gradient_input_is_stale),
            x_grad(item.x_grad),
            cached_output(item.cached_output)
        {
        }

        template <typename ...T>
        add_layer(
            const LAYER_DETAILS& layer_det, 
            T&& ...args
        ) : 
            details(layer_det), 
Davis King's avatar
Davis King committed
221
            subnetwork(std::forward<T>(args)...),
222
223
224
225
226
227
228
229
230
231
232
            this_layer_setup_called(false),
            gradient_input_is_stale(true)
        {
        }

        template <typename ...T>
        add_layer(
            LAYER_DETAILS&& layer_det, 
            T&& ...args
        ) : 
            details(std::move(layer_det)), 
Davis King's avatar
Davis King committed
233
            subnetwork(std::forward<T>(args)...),
234
235
236
237
238
239
240
            this_layer_setup_called(false),
            gradient_input_is_stale(true)
        {
        }

        template <typename input_iterator>
        void to_tensor (
241
242
            input_iterator ibegin,
            input_iterator iend,
243
244
245
            resizable_tensor& data
        ) const
        {
Davis King's avatar
Davis King committed
246
            subnetwork.to_tensor(ibegin,iend,data);
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
        }

        template <typename input_iterator>
        const tensor& operator() (
            input_iterator ibegin,
            input_iterator iend
        )
        {
            to_tensor(ibegin,iend,temp_tensor);
            return forward(temp_tensor);
        }


        const tensor& operator() (const input_type& x)
        {
            return (*this)(&x, &x+1);
        }

        const tensor& forward(const tensor& x)
        {
Davis King's avatar
Davis King committed
267
268
            subnetwork.forward(x);
            const dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
            if (!this_layer_setup_called)
            {
                details.setup(wsub);
                this_layer_setup_called = true;
            }
            details.forward(wsub, cached_output);
            gradient_input_is_stale = true;
            return get_output();
        }

        const tensor& get_output() const { return cached_output; }
        tensor& get_gradient_input() 
        { 
            if (gradient_input_is_stale)
            {
                gradient_input_is_stale = false;
                x_grad.copy_size(get_output());
                x_grad = 0;
            }
            return x_grad; 
        }

        template <typename solver_type>
        void update(const tensor& x, sstack<solver_type,num_layers>& solvers)
        {
Davis King's avatar
Davis King committed
294
            dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
295
296
297
298
299
300
            params_grad.copy_size(details.get_layer_params());
            params_grad = 0;
            details.backward(get_gradient_input(), wsub, static_cast<tensor&>(params_grad));
            // Don't try to adjust the parameters if this layer doesn't have any.
            if (params_grad.size() != 0)
                solvers.top()(details, static_cast<const tensor&>(params_grad));
Davis King's avatar
Davis King committed
301
            subnetwork.update(x, solvers.pop());
302
303
        }

Davis King's avatar
Davis King committed
304
305
        const subnet_type& subnet() const { return subnetwork; }
        subnet_type& subnet() { return subnetwork; }
306
307
308
309
310
311
312
313
314
315
316

        const layer_details_type& layer_details() const { return details; } 
        layer_details_type& layer_details() { return details; } 

        void clean()
        {
            x_grad.clear();
            cached_output.clear();
            params_grad.clear();
            temp_tensor.clear();
            gradient_input_is_stale = true;
Davis King's avatar
Davis King committed
317
            subnetwork.clean();
318
319
        }

320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
        friend void serialize(const add_layer& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.subnetwork, out);
            serialize(item.details, out);
            serialize(item.this_layer_setup_called, out);
            serialize(item.gradient_input_is_stale, out);
            serialize(item.x_grad, out);
            serialize(item.cached_output, out);
        }

        friend void deserialize(add_layer& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing dlib::add_layer.");
            deserialize(item.subnetwork, in);
            deserialize(item.details, in);
            deserialize(item.this_layer_setup_called, in);
            deserialize(item.gradient_input_is_stale, in);
            deserialize(item.x_grad, in);
            deserialize(item.cached_output, in);
        }

346
347
    private:

348
349
350
351
352
353
354
355
356
357
        void swap(add_layer& item)
        {
            std::swap(subnetwork,item.subnetwork);
            std::swap(details, item.details);
            std::swap(this_layer_setup_called, item.this_layer_setup_called);
            std::swap(gradient_input_is_stale, item.gradient_input_is_stale);
            std::swap(x_grad, item.x_grad);
            std::swap(cached_output, item.cached_output);
        }

358

Davis King's avatar
Davis King committed
359
        subnet_type subnetwork;
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
        LAYER_DETAILS details;
        bool this_layer_setup_called;
        bool gradient_input_is_stale;
        resizable_tensor x_grad;
        resizable_tensor cached_output; 

        // The following 2 objects don't logically contribute to the state of this class.
        // They are only here to prevent them from being reallocated over and over in
        // member functions.
        resizable_tensor params_grad; 
        resizable_tensor temp_tensor;

    };

// ----------------------------------------------------------------------------------------

376
// This version of add_layer handles the special case where the subnetwork being given is
Davis King's avatar
Davis King committed
377
// just an input layer object.
378
379
380
381
382
    template <typename LAYER_DETAILS, typename INPUT_LAYER, typename enabled>
    class add_layer
    {
    public:
        typedef LAYER_DETAILS layer_details_type;
Davis King's avatar
Davis King committed
383
        typedef INPUT_LAYER subnet_type;
384
385
386
387
388
389
390
391
392
393
394
395
396
        typedef typename INPUT_LAYER::input_type input_type;
        const static unsigned int sample_expansion_factor = INPUT_LAYER::sample_expansion_factor;
        const static size_t num_layers = 1;
        static_assert(sample_expansion_factor >= 1,
            "The input layer can't produce fewer output tensors than there are inputs.");

        add_layer(
        ): 
            this_layer_setup_called(false),
            gradient_input_is_stale(true) 
        {}

        add_layer(const add_layer&) = default;
397
        add_layer(add_layer&& item) : add_layer() { swap(item); }
398
        add_layer& operator=(const add_layer&) = default;
399
        add_layer& operator=(add_layer&& item) { swap(item); return *this; }
400
401
402
403
404
405
406
407
408
409

        template <typename T, typename U, typename E>
        friend class add_layer;

        // Allow copying networks from one to another as long as their corresponding 
        // layers can be constructed from each other.
        template <typename T, typename U, typename E>
        add_layer(
            const add_layer<T,U,E>& item
        ):
Davis King's avatar
Davis King committed
410
            input_layer(item.subnet()),
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
            details(item.layer_details()),
            this_layer_setup_called(item.this_layer_setup_called),
            gradient_input_is_stale(item.gradient_input_is_stale),
            x_grad(item.x_grad),
            cached_output(item.cached_output)
        {
        }

        add_layer(
            const LAYER_DETAILS& layer_det
        ) : 
            details(layer_det), 
            this_layer_setup_called(false),
            gradient_input_is_stale(true) 
        {}

        add_layer(
            LAYER_DETAILS&& layer_det
        ) : 
            details(std::move(layer_det)), 
            this_layer_setup_called(false),
            gradient_input_is_stale(true) 
        {}

        add_layer(
            LAYER_DETAILS layer_det, 
            INPUT_LAYER il
        ) : 
439
440
            details(std::move(layer_det)),
            input_layer(std::move(il)),
441
442
443
444
445
446
            this_layer_setup_called(false),
            gradient_input_is_stale(true)
        {}

        template <typename input_iterator>
        void to_tensor (
447
448
            input_iterator ibegin,
            input_iterator iend,
449
450
451
            resizable_tensor& data
        ) const
        {
452
            input_layer.to_tensor(ibegin, iend, data);
453
            // make sure the input layer's to_tensor() function is implemented properly.
454
            DLIB_CASSERT(std::distance(ibegin,iend)*sample_expansion_factor == data.num_samples(),"");
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
            data.async_copy_to_device();
        }


        template <typename input_iterator>
        const tensor& operator() (
            input_iterator ibegin,
            input_iterator iend
        )
        {
            to_tensor(ibegin,iend,temp_tensor);
            return forward(temp_tensor);
        }


        const tensor& operator() (const input_type& x)
        {
            return (*this)(&x, &x+1);
        }

        const tensor& forward (const tensor& x)
        {
            DLIB_CASSERT(x.num_samples()%sample_expansion_factor == 0,"");
Davis King's avatar
Davis King committed
478
            subnet_wrapper wsub(x, grad_final_ignored);
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
            if (!this_layer_setup_called)
            {
                details.setup(wsub);
                this_layer_setup_called = true;
            }
            details.forward(wsub, cached_output);
            gradient_input_is_stale = true;
            return get_output();
        }

        const tensor& get_output() const { return cached_output; }
        tensor& get_gradient_input() 
        { 
            if (gradient_input_is_stale)
            {
                gradient_input_is_stale = false;
                x_grad.copy_size(get_output());
                x_grad = 0;
            }
            return x_grad; 
        }


        template <typename solver_type>
        void update(const tensor& x, sstack<solver_type,num_layers>& solvers)
        {
Davis King's avatar
Davis King committed
505
            subnet_wrapper wsub(x, grad_final_ignored);
506
507
508
509
510
511
512
513
            params_grad.copy_size(details.get_layer_params());
            params_grad = 0;
            details.backward(get_gradient_input(), wsub, static_cast<tensor&>(params_grad));
            // Don't try to adjust the parameters if this layer doesn't have any.
            if (params_grad.size() != 0)
                solvers.top()(details, static_cast<const tensor&>(params_grad));
        }

Davis King's avatar
Davis King committed
514
515
        const subnet_type& subnet() const { return input_layer; } 
        subnet_type& subnet() { return input_layer; } 
516
517
518
519
520
521
522
523
524
525
526
527
528
529

        const layer_details_type& layer_details() const { return details; } 
        layer_details_type& layer_details() { return details; } 

        void clean()
        {
            x_grad.clear();
            grad_final_ignored.clear();
            cached_output.clear();
            params_grad.clear();
            temp_tensor.clear();
            gradient_input_is_stale = true;
        }

530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
        friend void serialize(const add_layer& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.input_layer, out);
            serialize(item.details, out);
            serialize(item.this_layer_setup_called, out);
            serialize(item.gradient_input_is_stale, out);
            serialize(item.x_grad, out);
            serialize(item.cached_output, out);
        }

        friend void deserialize(add_layer& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing dlib::add_layer.");
            deserialize(item.input_layer, in);
            deserialize(item.details, in);
            deserialize(item.this_layer_setup_called, in);
            deserialize(item.gradient_input_is_stale, in);
            deserialize(item.x_grad, in);
            deserialize(item.cached_output, in);
        }

556
557
    private:

Davis King's avatar
Davis King committed
558
        class subnet_wrapper
559
560
        {
        public:
Davis King's avatar
Davis King committed
561
            subnet_wrapper(const tensor& x_, resizable_tensor& grad_final_ignored_) :
562
563
                x(x_), grad_final_ignored(grad_final_ignored_) {}

Davis King's avatar
Davis King committed
564
565
            subnet_wrapper(const subnet_wrapper&) = delete;
            subnet_wrapper& operator=(const subnet_wrapper&) = delete;
566

567
568
569
570
571
            const tensor& get_output() const { return x; }
            tensor& get_gradient_input() 
            { 
                // It doesn't matter what values are in this tensor but client code will
                // always assume it's the same dimension as the output so make sure that is
572
573
574
575
576
577
578
                // the case.  Note that we do set it to a non-crazy value though to avoid
                // it being full of NaN and slowing the processing down.
                if (!have_same_dimensions(x, grad_final_ignored))
                {
                    grad_final_ignored.copy_size(x);
                    grad_final_ignored = 0;  
                }
579
580
581
582
583
584
585
586
                return grad_final_ignored; 
            }

        private:
            const tensor& x;
            resizable_tensor& grad_final_ignored;
        };

587
588
589
590
591
592
593
594
595
596
        void swap(add_layer& item)
        {
            std::swap(input_layer, item.input_layer);
            std::swap(details, item.details);
            std::swap(this_layer_setup_called, item.this_layer_setup_called);
            std::swap(gradient_input_is_stale, item.gradient_input_is_stale);
            std::swap(x_grad, item.x_grad); 
            std::swap(cached_output, item.cached_output); 
        }

Davis King's avatar
Davis King committed
597
        subnet_type input_layer;
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
        LAYER_DETAILS details;
        bool this_layer_setup_called;
        bool gradient_input_is_stale;
        resizable_tensor x_grad; 
        resizable_tensor cached_output; 

        // The following 3 objects don't logically contribute to the state of this class.
        // They are only here to prevent them from being reallocated over and over in
        // member functions.
        resizable_tensor params_grad; 
        resizable_tensor temp_tensor; 
        resizable_tensor grad_final_ignored;
    };

// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
614
    template <unsigned long ID, typename SUBNET, typename enabled=void>
615
616
    class add_tag_layer;

Davis King's avatar
Davis King committed
617
618
619
    template <unsigned long ID, typename SUBNET>
    class add_tag_layer<ID,SUBNET,
            typename std::enable_if<is_nonloss_layer_type<SUBNET>::value>::type>
620
621
    {
    public:
Davis King's avatar
Davis King committed
622
623
624
625
        typedef SUBNET subnet_type;
        typedef typename subnet_type::input_type input_type;
        const static size_t num_layers = subnet_type::num_layers + 1;
        const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
626
627
628
        static_assert(sample_expansion_factor >= 1,
            "The input layer can't produce fewer output tensors than there are inputs.");

Davis King's avatar
Davis King committed
629
630
631
632
633
        add_tag_layer() = default;
        add_tag_layer(const add_tag_layer&) = default;
        add_tag_layer(add_tag_layer&&) = default;
        add_tag_layer& operator=(add_tag_layer&&) = default;
        add_tag_layer& operator=(const add_tag_layer&) = default;
634
635

        template <typename T>
Davis King's avatar
Davis King committed
636
637
        add_tag_layer(
            const add_tag_layer<ID,T>& item
Davis King's avatar
Davis King committed
638
        ) : subnetwork(item.subnet())
639
640
641
        {}

        template <typename ...T>
Davis King's avatar
Davis King committed
642
        add_tag_layer(
643
644
            T ...args
        ) : 
Davis King's avatar
Davis King committed
645
            subnetwork(std::move(args)...) 
646
647
648
649
650
        {
        }

        template <typename input_iterator>
        void to_tensor (
651
652
            input_iterator ibegin,
            input_iterator iend,
653
654
655
            resizable_tensor& data
        ) const
        {
Davis King's avatar
Davis King committed
656
            subnetwork.to_tensor(ibegin,iend,data);
657
658
659
660
661
662
663
664
        }

        template <typename input_iterator>
        const tensor& operator() (
            input_iterator ibegin,
            input_iterator iend
        )
        {
Davis King's avatar
Davis King committed
665
            return subnetwork(ibegin,iend);
666
667
668
669
        }

        const tensor& operator() (const input_type& x)
        {
Davis King's avatar
Davis King committed
670
            return subnetwork(x);
671
672
673
674
        }

        const tensor& forward(const tensor& x)
        {
Davis King's avatar
Davis King committed
675
            return subnetwork.forward(x);
676
677
        }

Davis King's avatar
Davis King committed
678
        const tensor& get_output() const { return subnetwork.get_output(); }
679
680
681

        tensor& get_gradient_input() 
        { 
Davis King's avatar
Davis King committed
682
            return subnetwork.get_gradient_input();
683
684
685
686
687
        }

        template <typename solver_type>
        void update(const tensor& x, sstack<solver_type,num_layers>& solvers)
        {
Davis King's avatar
Davis King committed
688
            subnetwork.update(x,solvers.pop());
689
690
        }

Davis King's avatar
Davis King committed
691
692
        const subnet_type& subnet() const { return subnetwork; }
        subnet_type& subnet() { return subnetwork; }
693
694
695

        void clean()
        {
Davis King's avatar
Davis King committed
696
            subnetwork.clean();
697
698
        }

699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
        friend void serialize(const add_tag_layer& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.subnetwork, out);
        }

        friend void deserialize(add_tag_layer& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing dlib::add_tag_layer.");
            deserialize(item.subnetwork, in);
        }

715
716
    private:

Davis King's avatar
Davis King committed
717
        subnet_type subnetwork;
718
719
    };

720
721
// ----------------------------------------------------------------------------------------

722
// This version of add_tag_layer handles the special case where the subnetwork being given
723
724
725
726
727
// is just an input layer object.
    template <unsigned long ID, typename INPUT_LAYER, typename enabled>
    class add_tag_layer
    {
    public:
Davis King's avatar
Davis King committed
728
729
        typedef INPUT_LAYER subnet_type;
        typedef typename subnet_type::input_type input_type;
730
        const static size_t num_layers = 1;
Davis King's avatar
Davis King committed
731
        const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
732
733
734
735
736
737
        static_assert(sample_expansion_factor >= 1,
            "The input layer can't produce fewer output tensors than there are inputs.");

        add_tag_layer() = default;
        add_tag_layer(const add_tag_layer&) = default;
        add_tag_layer& operator=(const add_tag_layer&) = default;
738
739
        add_tag_layer(add_tag_layer&& item) : add_tag_layer() { swap(item); }
        add_tag_layer& operator=(add_tag_layer&& item) { swap(item); return *this; }
740
741
742
743

        template <typename T, typename E>
        add_tag_layer(
            const add_tag_layer<ID,T,E>& item
Davis King's avatar
Davis King committed
744
        ) : input_layer(item.subnet())
745
746
747
748
749
750
751
752
753
754
755
756
        {}

        template <typename ...T>
        add_tag_layer(
            T ...args
        ) : 
            input_layer(std::move(args)...) 
        {
        }

        template <typename input_iterator>
        void to_tensor (
757
758
            input_iterator ibegin,
            input_iterator iend,
759
760
761
            resizable_tensor& data
        ) const
        {
762
            input_layer.to_tensor(ibegin,iend,data);
763
764
765
766
        }

        template <typename input_iterator>
        const tensor& operator() (
767
            input_iterator ibegin, 
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
            input_iterator iend
        )
        {
            input_layer.to_tensor(ibegin,iend,cached_output);
            return get_output();
        }

        const tensor& operator() (const input_type& x)
        {
            return (*this)(&x, &x+1);
        }

        const tensor& forward(const tensor& x)
        {
            cached_output = x;
            return get_output();
        }

        const tensor& get_output() const 
        { 
            return cached_output; 
        }

        tensor& get_gradient_input() 
        { 
            if (!have_same_dimensions(cached_output, grad_final_ignored))
            {
                grad_final_ignored.copy_size(get_output());
                grad_final_ignored = 0;
            }
            return grad_final_ignored; 
        }

        template <typename solver_type>
        void update(const tensor& /*x*/, sstack<solver_type,num_layers>& /*solvers*/)
        {
            // nothing to update
        }

Davis King's avatar
Davis King committed
807
808
        const subnet_type& subnet() const { return input_layer; }
        subnet_type& subnet() { return input_layer; }
809
810
811
812
813
814
815

        void clean()
        {
            grad_final_ignored.clear();
            cached_output.clear();
        }

816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
        friend void serialize(const add_tag_layer& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.input_layer, out);
            serialize(item.cached_output, out);
            serialize(item.grad_final_ignored, out);
        }

        friend void deserialize(add_tag_layer& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing dlib::add_tag_layer.");
            deserialize(item.input_layer, in);
            deserialize(item.cached_output, in);
            deserialize(item.grad_final_ignored, in);
        }

836
837
    private:

838
839
840
841
842
843
844
        void swap(add_tag_layer& item)
        {
            std::swap(input_layer, item.input_layer);
            std::swap(cached_output, item.cached_output);
            std::swap(grad_final_ignored, item.grad_final_ignored);
        }

Davis King's avatar
Davis King committed
845
        subnet_type input_layer;
846
847
848
849
850
851
        resizable_tensor cached_output;
        resizable_tensor grad_final_ignored;
    };

    template <unsigned long ID, typename U, typename E>
    struct is_nonloss_layer_type<add_tag_layer<ID,U,E>> : std::true_type {};
852
853
854
855
856
857


// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
858
    template <typename LOSS_DETAILS, typename SUBNET>
Davis King's avatar
Davis King committed
859
    class add_loss_layer;
860
861
862
863
864

    class no_label_type
    {
    private:
        // We don't want anyone making these no_label_type objects.  They are here only to
Davis King's avatar
Davis King committed
865
866
        // allow add_loss_layer::label_type and dnn_trainer::label_type to exist which voids
        // needing to overload add_loss_layer and dnn_trainer for supervised an unsupervised
867
868
        // losses.  It also can be a type to use in template metaprogramming to indicate
        // "no label".  So here we make the constructor private with the exception that
Davis King's avatar
Davis King committed
869
        // add_loss_layer objects can make it (again, just to simplify add_loss_layer's
870
871
        // implementation).
        no_label_type()=default;
Davis King's avatar
Davis King committed
872
        template <typename LOSS_DETAILS, typename SUBNET> friend class add_loss_layer;
873
874
875
876
    };

// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
877
    template <typename LOSS_DETAILS, typename SUBNET>
Davis King's avatar
Davis King committed
878
    class add_loss_layer
879
880
881
882
883
884
885
886
887
888
889
890
891
892
    {
        template <typename T, typename enabled=void>
        struct get_loss_layer_label_type
        {
            typedef no_label_type type;
        };
        template <typename T>
        struct get_loss_layer_label_type<T,typename std::enable_if<sizeof(typename T::label_type)!=0>::type>
        {
            typedef typename T::label_type type;
        };

    public:
        typedef LOSS_DETAILS loss_details_type;
Davis King's avatar
Davis King committed
893
894
        typedef SUBNET subnet_type;
        typedef typename subnet_type::input_type input_type;
895
        // Note that the loss layer doesn't count as an additional layer.
Davis King's avatar
Davis King committed
896
897
        const static size_t num_layers = subnet_type::num_layers;
        const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
898
899
        typedef typename get_loss_layer_label_type<LOSS_DETAILS>::type label_type;

900
901
        static_assert(is_nonloss_layer_type<SUBNET>::value, 
            "SUBNET must be of type add_layer, add_skip_layer, or add_tag_layer."); 
902
903
904
905
        static_assert(sample_expansion_factor == LOSS_DETAILS::sample_expansion_factor,
            "The loss layer and input layer must agree on the sample_expansion_factor.");


906
        add_loss_layer() {};
Davis King's avatar
Davis King committed
907
908
        add_loss_layer(const add_loss_layer&) = default;
        add_loss_layer& operator=(const add_loss_layer&) = default;
909
910
        add_loss_layer(add_loss_layer&& item) : add_loss_layer() { swap(item); }
        add_loss_layer& operator=(add_loss_layer&& item) { swap(item); return *this; }
911
912

        template <typename T, typename U>
Davis King's avatar
Davis King committed
913
914
        add_loss_layer(
            const add_loss_layer<T,U>& item
915
916
        ) : 
            loss(item.loss_details()),
Davis King's avatar
Davis King committed
917
            subnetwork(item.subnet())
918
919
920
        {}

        template <typename ...T>
Davis King's avatar
Davis King committed
921
        add_loss_layer(
922
923
924
925
            const LOSS_DETAILS& layer_det, 
            T&& ...args
        ) : 
            loss(layer_det), 
Davis King's avatar
Davis King committed
926
            subnetwork(std::forward<T>(args)...)
927
928
929
930
        {
        }

        template <typename ...T>
Davis King's avatar
Davis King committed
931
        add_loss_layer(
932
933
934
935
            LOSS_DETAILS&& layer_det, 
            T&& ...args
        ) : 
            loss(std::move(layer_det)), 
Davis King's avatar
Davis King committed
936
            subnetwork(std::forward<T>(args)...)
937
938
939
940
        {
        }

        template <typename ...T>
Davis King's avatar
Davis King committed
941
        add_loss_layer(
942
943
            T ...args
        ) : 
Davis King's avatar
Davis King committed
944
            subnetwork(std::move(args)...)
945
        {
946
947
948
949
950
951
952
953
954
        }

        template <typename input_iterator>
        void to_tensor (
            input_iterator ibegin,
            input_iterator iend,
            resizable_tensor& data
        ) const
        {
Davis King's avatar
Davis King committed
955
            subnetwork.to_tensor(ibegin,iend,data);
956
957
958
959
960
961
962
963
        }

        template <typename output_iterator>
        void operator() (
            const tensor& x, 
            output_iterator obegin
        )
        {
Davis King's avatar
Davis King committed
964
965
            subnetwork.forward(x);
            const dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
966
            loss.to_label(wsub, obegin);
967
968
969
970
971
972
973
974
975
        }

        template <typename input_iterator, typename output_iterator>
        void operator() (
            input_iterator ibegin,
            input_iterator iend,
            output_iterator obegin
        )
        {
976
977
            to_tensor(ibegin,iend,temp_tensor);
            (*this)(temp_tensor, obegin);
978
979
980
981
982
983
984
985
        }

        const label_type& operator() (const input_type& x)
        {
            (*this)(&x, &x+1, &temp_label);
            return temp_label;
        }

986
987
988
989
990
991
        template <typename label_iterator>
        double compute_loss (
            const tensor& x,
            label_iterator lbegin 
        )
        {
Davis King's avatar
Davis King committed
992
993
            subnetwork.forward(x);
            dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
994
995
            return loss.compute_loss(x, lbegin, wsub);
        }
996
997
998
999
1000
1001
1002
1003

        template <typename input_iterator, typename label_iterator>
        double compute_loss (
            input_iterator ibegin,
            input_iterator iend,
            label_iterator lbegin 
        )
        {
1004
1005
1006
1007
1008
1009
1010
1011
            to_tensor(ibegin,iend,temp_tensor);
            return compute_loss(temp_tensor, lbegin);
        }

        double compute_loss (
            const tensor& x
        )
        {
Davis King's avatar
Davis King committed
1012
1013
            subnetwork.forward(x);
            dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
1014
            return loss.compute_loss(x, wsub);
1015
1016
1017
1018
1019
1020
1021
1022
        }

        template <typename input_iterator>
        double compute_loss (
            input_iterator ibegin,
            input_iterator iend
        )
        {
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
            to_tensor(ibegin,iend,temp_tensor);
            return compute_loss(temp_tensor);
        }

        template <typename label_iterator, typename solver_type>
        double update (
            const tensor& x,
            label_iterator lbegin,
            sstack<solver_type,num_layers>& solvers
        )
        {
Davis King's avatar
Davis King committed
1034
1035
            subnetwork.forward(x);
            dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
1036
            double l = loss.compute_loss(x, lbegin, wsub);
Davis King's avatar
Davis King committed
1037
            subnetwork.update(x, solvers);
1038
            return l;
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
        }

        template <typename input_iterator, typename label_iterator, typename solver_type>
        double update (
            input_iterator ibegin,
            input_iterator iend,
            label_iterator lbegin,
            sstack<solver_type,num_layers>& solvers
        )
        {
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
            to_tensor(ibegin,iend,temp_tensor);
            return update(temp_tensor, lbegin, solvers);
        }

        template <typename solver_type>
        double update (
            const tensor& x,
            sstack<solver_type,num_layers>& solvers
        )
        {
Davis King's avatar
Davis King committed
1059
1060
            subnetwork.forward(x);
            dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
1061
            double l = loss.compute_loss(x, wsub);
Davis King's avatar
Davis King committed
1062
            subnetwork.update(x, solvers);
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
            return l;
        }

        template <typename input_iterator, typename solver_type>
        double update (
            input_iterator ibegin,
            input_iterator iend,
            sstack<solver_type,num_layers>& solvers
        )
        {
1073
1074
            to_tensor(ibegin,iend,temp_tensor);
            return update(temp_tensor, solvers);
1075
1076
        }

Davis King's avatar
Davis King committed
1077
1078
        const subnet_type& subnet() const { return subnetwork; }
        subnet_type& subnet() { return subnetwork; }
1079
1080
1081
1082
1083
1084
1085
        const loss_details_type& loss_details() const { return loss; }
        loss_details_type& loss_details() { return loss; }

        void clean (
        )
        {
            temp_tensor.clear();
1086
            subnetwork.clean();
1087
1088
        }

1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
        friend void serialize(const add_loss_layer& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.loss, out);
            serialize(item.subnetwork, out);
        }

        friend void deserialize(add_loss_layer& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing dlib::add_loss_layer.");
            deserialize(item.loss, in);
            deserialize(item.subnetwork, in);
        }

1107
1108
    private:

1109
1110
1111
1112
1113
1114
        void swap(add_loss_layer& item)
        {
            std::swap(loss, item.loss);
            std::swap(subnetwork, item.subnetwork);
        }

1115
        loss_details_type loss;
Davis King's avatar
Davis King committed
1116
        subnet_type subnetwork;
1117
1118
1119
1120
1121
1122
1123
1124
1125

        // These two objects don't logically contribute to the state of this object.  They
        // are here to prevent them from being reallocated over and over.
        label_type temp_label;
        resizable_tensor temp_tensor;
    };


    template <typename T, typename U>
Davis King's avatar
Davis King committed
1126
    struct is_loss_layer_type<add_loss_layer<T,U>> : std::true_type {};
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137

// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

    namespace impl
    {
        template <unsigned int i, typename T>
        struct layer_helper
        {
            static T& makeT();
Davis King's avatar
Davis King committed
1138
            using next_type = typename std::remove_reference<decltype(makeT().subnet())>::type;
1139
1140
1141
            using type = typename layer_helper<i-1,next_type>::type;
            static type& layer(T& n)
            {
Davis King's avatar
Davis King committed
1142
                return layer_helper<i-1,next_type>::layer(n.subnet());
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
            }
        };
        template <typename T>
        struct layer_helper<0,T>
        {
            using type = T;
            static type& layer(T& n)
            {
                return n;
            }
        };

        template <template<typename> class Match, typename T, unsigned int i, typename enabled = void>
        struct layer_helper_match
        {
            static T& makeT();
Davis King's avatar
Davis King committed
1159
            using next_type = typename std::remove_reference<decltype(makeT().subnet())>::type;
1160
1161
1162
            using type = typename layer_helper_match<Match,next_type,i>::type;
            static type& layer(T& n)
            {
Davis King's avatar
Davis King committed
1163
                return layer_helper_match<Match,next_type,i>::layer(n.subnet());
1164
1165
            }
        };
Davis King's avatar
Davis King committed
1166
        // This overload catches add_layer and add_loss_layer templates.
1167
1168
        template <template<typename> class Match, typename T, unsigned int i>
        struct layer_helper_match<Match,T,i,
Davis King's avatar
Davis King committed
1169
            typename std::enable_if<std::is_same<const T,const  Match<typename T::subnet_type>>::value>::type>
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
        {
            using type = typename layer_helper<i,T>::type;
            static type& layer(T& n)
            {
                return layer_helper<i,T>::layer(n);
            }
        };
        // This overload catches input templates.
        template <template<typename> class Match, typename T, unsigned int i>
        struct layer_helper_match<Match,T,i,
            typename std::enable_if<std::is_same<const T,const  Match<typename T::input_type>>::value>::type>
        {
            using type = typename layer_helper<i,T>::type;
            static type& layer(T& n)
            {
                return layer_helper<i,T>::layer(n);
            }
        };
Davis King's avatar
Davis King committed
1188
        // This overload catches subnet_wrapper templates.
1189
1190
1191
        template <template<typename> class Match, typename T, unsigned int i>
        struct layer_helper_match<Match,T,i,
            typename std::enable_if<std::is_same<const typename T::wrapped_type, 
Davis King's avatar
Davis King committed
1192
                                                 const Match<typename T::wrapped_type::subnet_type>>::value>::type>
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
        {
            using type = typename layer_helper<i,T>::type;
            static type& layer(T& n)
            {
                return layer_helper<i,T>::layer(n);
            }
        };
    }

    template <unsigned int i, typename T>
    typename impl::layer_helper<i,T>::type& layer (T& n) 
    {
        return impl::layer_helper<i,T>::layer(n);
    }

    template <template<typename> class Match, typename T>
    typename impl::layer_helper_match<Match,T,0>::type& layer (T& n) 
    {
        return impl::layer_helper_match<Match,T,0>::layer(n);
    }

    template <template<typename> class Match, unsigned int i, typename T>
    typename impl::layer_helper_match<Match,T,i>::type& layer (T& n) 
    {
        return impl::layer_helper_match<Match,T,i>::layer(n);
    }

// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
1222
    template <template<typename> class TAG_TYPE, typename SUBNET>
Davis King's avatar
Davis King committed
1223
    class add_skip_layer
1224
1225
    {
    public:
Davis King's avatar
Davis King committed
1226
1227
1228
1229
        typedef SUBNET subnet_type;
        typedef typename subnet_type::input_type input_type;
        const static size_t num_layers = subnet_type::num_layers + 1;
        const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
1230
1231
1232
        static_assert(sample_expansion_factor >= 1,
            "The input layer can't produce fewer output tensors than there are inputs.");

Davis King's avatar
Davis King committed
1233
1234
1235
1236
1237
        add_skip_layer() = default;
        add_skip_layer(const add_skip_layer&) = default;
        add_skip_layer(add_skip_layer&&) = default;
        add_skip_layer& operator=(add_skip_layer&&) = default;
        add_skip_layer& operator=(const add_skip_layer&) = default;
1238
1239

        template <typename T>
Davis King's avatar
Davis King committed
1240
1241
        add_skip_layer(
            const add_skip_layer<TAG_TYPE,T>& item
Davis King's avatar
Davis King committed
1242
        ) : subnetwork(item.subnet())
1243
1244
1245
        {}

        template <typename ...T>
Davis King's avatar
Davis King committed
1246
        add_skip_layer(
1247
1248
            T ...args
        ) : 
Davis King's avatar
Davis King committed
1249
            subnetwork(std::move(args)...) 
1250
1251
1252
1253
1254
        {
        }

        template <typename input_iterator>
        void to_tensor (
1255
1256
            input_iterator ibegin,
            input_iterator iend,
1257
1258
1259
            resizable_tensor& data
        ) const
        {
Davis King's avatar
Davis King committed
1260
            subnetwork.to_tensor(ibegin,iend,data);
1261
1262
1263
1264
1265
1266
1267
1268
        }

        template <typename input_iterator>
        const tensor& operator() (
            input_iterator ibegin,
            input_iterator iend
        )
        {
Davis King's avatar
Davis King committed
1269
1270
            subnetwork(ibegin,iend);
            return layer<TAG_TYPE>(subnetwork).get_output();
1271
1272
1273
1274
        }

        const tensor& operator() (const input_type& x)
        {
Davis King's avatar
Davis King committed
1275
1276
            subnetwork(x);
            return layer<TAG_TYPE>(subnetwork).get_output();
1277
1278
1279
1280
        }

        const tensor& forward(const tensor& x)
        {
Davis King's avatar
Davis King committed
1281
1282
            subnetwork.forward(x);
            return layer<TAG_TYPE>(subnetwork).get_output();
1283
1284
1285
1286
        }

        const tensor& get_output() const 
        { 
Davis King's avatar
Davis King committed
1287
            return layer<TAG_TYPE>(subnetwork).get_output();
1288
1289
1290
1291
        }

        tensor& get_gradient_input() 
        { 
Davis King's avatar
Davis King committed
1292
            return layer<TAG_TYPE>(subnetwork).get_gradient_input();
1293
1294
1295
1296
1297
        }

        template <typename solver_type>
        void update(const tensor& x, sstack<solver_type,num_layers>& solvers)
        {
Davis King's avatar
Davis King committed
1298
            subnetwork.update(x,solvers.pop());
1299
1300
        }

Davis King's avatar
Davis King committed
1301
        const subnet_type& subnet() const 
1302
        { 
Davis King's avatar
Davis King committed
1303
            return subnetwork; 
1304
1305
        }

Davis King's avatar
Davis King committed
1306
        subnet_type& subnet() 
1307
        { 
Davis King's avatar
Davis King committed
1308
            return subnetwork; 
1309
1310
1311
1312
        }

        void clean()
        {
Davis King's avatar
Davis King committed
1313
            subnetwork.clean();
1314
1315
        }

1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
        friend void serialize(const add_skip_layer& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.subnetwork, out);
        }

        friend void deserialize(add_skip_layer& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing dlib::add_skip_layer.");
            deserialize(item.subnetwork, in);
        }

1332
1333
    private:

Davis King's avatar
Davis King committed
1334
        subnet_type subnetwork;
1335
1336
    };
    template <template<typename> class T, typename U>
Davis King's avatar
Davis King committed
1337
1338
    struct is_nonloss_layer_type<add_skip_layer<T,U>> : std::true_type {};

Davis King's avatar
Davis King committed
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
    template <typename SUBNET> using tag1  = add_tag_layer< 1, SUBNET>;
    template <typename SUBNET> using tag2  = add_tag_layer< 2, SUBNET>;
    template <typename SUBNET> using tag3  = add_tag_layer< 3, SUBNET>;
    template <typename SUBNET> using tag4  = add_tag_layer< 4, SUBNET>;
    template <typename SUBNET> using tag5  = add_tag_layer< 5, SUBNET>;
    template <typename SUBNET> using tag6  = add_tag_layer< 6, SUBNET>;
    template <typename SUBNET> using tag7  = add_tag_layer< 7, SUBNET>;
    template <typename SUBNET> using tag8  = add_tag_layer< 8, SUBNET>;
    template <typename SUBNET> using tag9  = add_tag_layer< 9, SUBNET>;
    template <typename SUBNET> using tag10 = add_tag_layer<10, SUBNET>;

    template <typename SUBNET> using skip1  = add_skip_layer< tag1, SUBNET>;
    template <typename SUBNET> using skip2  = add_skip_layer< tag2, SUBNET>;
    template <typename SUBNET> using skip3  = add_skip_layer< tag3, SUBNET>;
    template <typename SUBNET> using skip4  = add_skip_layer< tag4, SUBNET>;
    template <typename SUBNET> using skip5  = add_skip_layer< tag5, SUBNET>;
    template <typename SUBNET> using skip6  = add_skip_layer< tag6, SUBNET>;
    template <typename SUBNET> using skip7  = add_skip_layer< tag7, SUBNET>;
    template <typename SUBNET> using skip8  = add_skip_layer< tag8, SUBNET>;
    template <typename SUBNET> using skip9  = add_skip_layer< tag9, SUBNET>;
    template <typename SUBNET> using skip10 = add_skip_layer<tag10, SUBNET>;
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375

// ----------------------------------------------------------------------------------------

    namespace timpl
    {
        void fill_with_gassuan_random_numbers (
            tensor& t,
            dlib::rand& rnd,
            double sigma = 1
        )
        {
            float* data = t.host();
            for (size_t i = 0; i < t.size(); ++i)
                data[i] = rnd.get_random_gaussian()*sigma;
        }

Davis King's avatar
Davis King committed
1376
        class test_layer_subnet 
1377
1378
        {
        public:
Davis King's avatar
Davis King committed
1379
            test_layer_subnet (
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
                dlib::rand& rnd_
            ) : rnd(rnd_) 
            {
                // Output and gradient_input have to have the same dimensions in each
                // layer.
                const long num_samples = rnd.get_random_32bit_number()%4+3;
                const long nr = rnd.get_random_32bit_number()%4+2;
                const long nc = rnd.get_random_32bit_number()%4+2;
                const long k  = rnd.get_random_32bit_number()%4+2;

                output.set_size(num_samples, nr, nc, k);
                gradient_input.set_size(num_samples, nr, nc, k);

                // Use a non-zero initial gradient to make sure the layers add to it
                // rather than assign and blow away the initial value.
                fill_with_gassuan_random_numbers(gradient_input, rnd, 0.01);

                fill_with_gassuan_random_numbers(output, rnd);
            }


            const tensor& get_output() const { return output; }
Davis King's avatar
Davis King committed
1402
            const test_layer_subnet& subnet() const { init_sub(); return *subnetwork; }
1403
1404

            tensor& get_gradient_input() { return gradient_input; }
Davis King's avatar
Davis King committed
1405
            test_layer_subnet& subnet() { init_sub(); return *subnetwork; }
1406
1407
1408
1409
1410



            unsigned long count_outputs() const
            {
Davis King's avatar
Davis King committed
1411
1412
                if (subnetwork)
                    return subnetwork->count_outputs() + output.size();
1413
1414
1415
1416
1417
1418
1419
1420
1421
                else
                    return output.size();
            }

            float& get_output_element(unsigned long i)
            {
                if (i < output.size())
                    return output.host()[i];
                else
Davis King's avatar
Davis King committed
1422
                    return subnet().get_output_element(i-output.size());
1423
1424
1425
1426
1427
1428
1429
            }

            float get_gradient_input_element(unsigned long i) const
            {
                if (i < gradient_input.size())
                    return gradient_input.host()[i];
                else
Davis King's avatar
Davis King committed
1430
                    return subnet().get_gradient_input_element(i-gradient_input.size());
1431
1432
1433
1434
1435
            }


        private:
            // We lazily initialize sub-layers as needed when someone tries to call
Davis King's avatar
Davis King committed
1436
            // subnet()
1437
1438
            void init_sub() const
            {
Davis King's avatar
Davis King committed
1439
1440
                if (!subnetwork)
                    subnetwork.reset(new test_layer_subnet(rnd));
1441
1442
1443
            }

            dlib::rand& rnd;
Davis King's avatar
Davis King committed
1444
            mutable std::unique_ptr<test_layer_subnet> subnetwork;
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
            resizable_tensor output;
            resizable_tensor gradient_input;
        };


        void print_tensor(
            const tensor& a
        )
        {
            auto data = a.host();
            for (size_t i = 0; i < a.size(); ++i)
                std::cout << data[i] << " ";
            std::cout << std::endl;
        }
    }

    template <
        typename layer_details_type
        >
    void test_layer (
        layer_details_type l
    )
    {
        const float base_eps = 0.01;
        using namespace timpl;
        // Do some setup
        dlib::rand rnd;
Davis King's avatar
Davis King committed
1472
        test_layer_subnet subnetwork(rnd);
1473
        resizable_tensor output, out2, out3;
Davis King's avatar
Davis King committed
1474
        // Run setup() and forward() as well to make sure any calls to subnet() have
1475
        // happened before we start assuming we know how many data elements there are
Davis King's avatar
Davis King committed
1476
1477
        // (since we do a lazy layer creation thing based on calls to subnet() inside
        // test_layer_subnet).
Davis King's avatar
Davis King committed
1478
1479
        l.setup(subnetwork);
        l.forward(subnetwork, output);
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489

        resizable_tensor input_grad;
        input_grad.copy_size(output);
        std::cout << "output.num_samples(): "<< output.num_samples() << std::endl;
        fill_with_gassuan_random_numbers(input_grad, rnd);

        // The f() we are computing gradients of is this thing.  It's value at the current
        // parameter and data values is:
        std::cout << "f(data,params): " << dot(output, input_grad) << std::endl;

Davis King's avatar
Davis King committed
1490
        // We are going to save a copy of the subnetwork.get_gradient_input() data before we do
1491
1492
1493
        // backpropagation since the backward() function is supposed to *add* to the
        // gradients rather than overwrite them.  We will use this saved data to check if
        // that is the case.
Davis King's avatar
Davis King committed
1494
        const unsigned long num_data_inputs = subnetwork.count_outputs();
1495
1496
        std::vector<float> initial_gradient_input(num_data_inputs);
        for (unsigned long i = 0; i < num_data_inputs; ++i)
Davis King's avatar
Davis King committed
1497
            initial_gradient_input[i] = subnetwork.get_gradient_input_element(i);
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507


        // Now tell the layer to compute all the gradients.  In the rest of this function
        // we will just be checking that these gradients were computed correctly by
        // comparing them to a central differences approximation.
        resizable_tensor params_grad, random_noise;
        params_grad.copy_size(l.get_layer_params());
        random_noise.copy_size(l.get_layer_params());
        randomize_parameters(random_noise, 5, rnd);
        params_grad = random_noise;
Davis King's avatar
Davis King committed
1508
        l.backward(input_grad, subnetwork, params_grad);
1509
1510
1511
1512
1513

        running_stats<double> rs_param, rs_data;

        // ==================================================================
        // first validate the way the parameter gradients are computed
1514
        for (unsigned long i = 0; i < params_grad.size(); ++i)
1515
1516
1517
1518
1519
1520
1521
1522
        {
            layer_details_type l1(l);

            float eps = l1.get_layer_params().host()[i]*base_eps;
            if (eps == 0)
                eps = base_eps;
            const float oldval = l1.get_layer_params().host()[i];
            l1.get_layer_params().host()[i] = oldval+eps;
Davis King's avatar
Davis King committed
1523
            l1.forward(subnetwork, out2);
1524
            l1.get_layer_params().host()[i] = oldval-eps;
Davis King's avatar
Davis King committed
1525
            l1.forward(subnetwork, out3);
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546

            // Compute a reference derivative via a central differences approximation and
            // compare it to the one output by the layer and make sure they match.
            double reference_derivative = (dot(out2,input_grad)-dot(out3, input_grad))/(2*eps);
            double output_derivative = params_grad.host()[i]-random_noise.host()[i];
            double relative_error = (reference_derivative - output_derivative)/(reference_derivative + 1e-100);
            if (std::abs(relative_error) > 0.01)
            {
                using namespace std;
                cout << "PARAM ERROR: "<< relative_error << endl;
                cout << "   reference_derivative:   " << reference_derivative << endl;
                cout << "   output_derivative: " << output_derivative << endl;
            }

            rs_param.add(std::abs(relative_error));
        }

        // ==================================================================
        // now validate the data gradients
        for (unsigned long i = 0; i < num_data_inputs; ++i)
        {
Davis King's avatar
Davis King committed
1547
            const float oldval = subnetwork.get_output_element(i);
1548
1549
1550
            float eps = oldval*base_eps;
            if (eps == 0)
                eps = base_eps;
Davis King's avatar
Davis King committed
1551
1552
1553
1554
            subnetwork.get_output_element(i) = oldval+eps;
            l.forward(subnetwork, out2);
            subnetwork.get_output_element(i) = oldval-eps;
            l.forward(subnetwork, out3);
1555
1556
1557
1558

            // Compute a reference derivative via a central differences approximation and
            // compare it to the one output by the layer and make sure they match.
            double reference_derivative = (dot(out2,input_grad)-dot(out3, input_grad))/(2*eps);
Davis King's avatar
Davis King committed
1559
            double output_derivative = subnetwork.get_gradient_input_element(i)-initial_gradient_input[i];
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
            double relative_error = (reference_derivative - output_derivative)/(reference_derivative + 1e-100);
            if (std::abs(relative_error) > 0.01)
            {
                using namespace std;
                cout << "DATA ERROR: "<< relative_error << endl;
                cout << "   reference_derivative:   " << reference_derivative << endl;
                cout << "   output_derivative: " << output_derivative << endl;
            }
            rs_data.add(std::abs(relative_error));
        }

        using namespace std;
        if (rs_param.current_n() > 1)
        {
            cout << "rs_param.mean():   " << rs_param.mean() << endl;
            cout << "rs_param.stddev(): " << rs_param.stddev() << endl;
            cout << "rs_param.max():    " << rs_param.max() << endl;
        }
        if (rs_data.current_n() > 1)
        {
            cout << "rs_data.mean():    " << rs_data.mean() << endl;
            cout << "rs_data.stddev():  " << rs_data.stddev() << endl;
            cout << "rs_data.max():     " << rs_data.max() << endl;
        }
    }

// ----------------------------------------------------------------------------------------

}

1590
#endif // DLIB_DNn_CORE_H_
1591
1592