core.h 52.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
// Copyright (C) 2015  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#ifndef DLIB_DNn_CORE_H_
#define DLIB_DNn_CORE_H_

#include "core_abstract.h"
#include "tensor.h"
#include <iterator>
#include <memory>
#include <type_traits>
Davis King's avatar
Davis King committed
11
12
#include "../statistics.h"
#include "../rand.h"
13
#include "../algs.h"
14
15
16
17
18
19
20
21
#include <utility>


namespace dlib
{

// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
22
23
24
25
    // Tell us if T is one of the special layer types (i.e. add_layer, add_tag_layer, or
    // add_skip_layer).
    template <typename T> struct is_nonloss_layer_type : std::false_type {};
    // Tell us if T is an instance of add_loss_layer.
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
    template <typename T> struct is_loss_layer_type : std::false_type {};

// ----------------------------------------------------------------------------------------

    inline void randomize_parameters (
        tensor& params,
        unsigned long num_inputs_and_outputs,
        dlib::rand& rnd
    )
    {
        float* data = params.host();
        for (size_t i = 0; i < params.size(); ++i)
        {
            // Draw a random number to initialize the layer according to formula (16)
            // from Understanding the difficulty of training deep feedforward neural
            // networks by Xavier Glorot and Yoshua Bengio.
            float val = 2*rnd.get_random_float()-1;
            val *= std::sqrt(6.0/(num_inputs_and_outputs));

            data[i] = val;
        }
    }

// ----------------------------------------------------------------------------------------

    template <typename T, size_t N>
    class sstack
    {
Davis King's avatar
Davis King committed
54
55
56
57
    public:
        static_assert(N > 0, "You can't create an empty sstack.");
        typedef T value_type;
        const static size_t num_elements = N;
58

Davis King's avatar
Davis King committed
59
60
        sstack() {}
        sstack(const T& item_) : item(item_), data(item_) {}
61

Davis King's avatar
Davis King committed
62
63
        const T& top() const { return item; }
        T& top() { return item; }
64

Davis King's avatar
Davis King committed
65
        size_t size() const { return N; }
66

Davis King's avatar
Davis King committed
67
68
        const sstack<T,N-1>& pop() const { return data; }
        sstack<T,N-1>& pop() { return data; }
69

70
71
72
73
74
75
76
77
78
79
80
81
        friend void serialize(const sstack& item, std::ostream& out)
        {
            serialize(item.top(), out);
            serialize(item.pop(), out);
        }

        friend void deserialize(sstack& item, std::istream& in)
        {
            deserialize(item.top(), in);
            deserialize(item.pop(), in);
        }

Davis King's avatar
Davis King committed
82
83
84
    private:
        T item;
        sstack<T,N-1> data;
85
86
87
88
89
90
91
92
93
94
95
96
97
    };

    template <typename T>
    class sstack<T,1> // base case of recursive definition.
    {
    public:
        sstack() {}
        explicit sstack(const T& item_) : item(item_) {}

        const T& top() const { return item; }
        T& top() { return item; }

        size_t size() const { return 1; }
98
99
100
101
102
103
104
105
106
107
108

        friend void serialize(const sstack& item, std::ostream& out)
        {
            serialize(item.top(), out);
        }

        friend void deserialize(sstack& item, std::istream& in)
        {
            deserialize(item.top(), in);
        }

109
110
111
112
113
114
115
116
117
118
119
    private:
        T item;
    };

// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

    namespace dimpl
    {
        template <typename T, typename enabled=void>
Davis King's avatar
Davis King committed
120
        class subnet_wrapper
121
122
123
        {
            /*!
                WHAT THIS OBJECT REPRESENTS
Davis King's avatar
Davis King committed
124
                    This is a tool that makes an add_layer or add_loss_layer object
Davis King's avatar
Davis King committed
125
                    expose only the part of its interface defined by the SUBNET
126
                    type in layers_abstract.h.  This way, when we pass subnetwork
127
                    objects to the layer callbacks those callbacks won't be able to 
128
                    interact with the subnetworks in a way other than specified 
Davis King's avatar
Davis King committed
129
                    by the SUBNET interface spec.
130
131
132
            !*/

        public:
Davis King's avatar
Davis King committed
133
134
            subnet_wrapper(const subnet_wrapper&) = delete;
            subnet_wrapper& operator=(const subnet_wrapper&) = delete;
135

Davis King's avatar
Davis King committed
136
            subnet_wrapper(T& l_) {}
137
138
139
140
141
            // Nothing here because in this case T is one of the input layer types 
            // that doesn't have anything in it.
        };

        template <typename T>
Davis King's avatar
Davis King committed
142
        class subnet_wrapper<T,typename std::enable_if<is_nonloss_layer_type<T>::value>::type>
143
144
145
        {

        public:
Davis King's avatar
Davis King committed
146
147
            subnet_wrapper(const subnet_wrapper&) = delete;
            subnet_wrapper& operator=(const subnet_wrapper&) = delete;
148

149
150
151
            typedef T wrapped_type;
            const static size_t num_layers = T::num_layers;

Davis King's avatar
Davis King committed
152
            subnet_wrapper(T& l_) : l(l_),subnetwork(l.subnet()) {}
153
154
155
156

            const tensor& get_output() const { return l.get_output(); }
            tensor& get_gradient_input() { return l.get_gradient_input(); }

Davis King's avatar
Davis King committed
157
158
            const subnet_wrapper<typename T::subnet_type>& subnet() const { subnetwork; }
            subnet_wrapper<typename T::subnet_type>& subnet() { subnetwork; }
159
160
161

        private:
            T& l;
Davis King's avatar
Davis King committed
162
            subnet_wrapper<typename T::subnet_type> subnetwork;
163
164
165
        };
    }

Davis King's avatar
Davis King committed
166
167
// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
168
    template <typename LAYER_DETAILS, typename SUBNET, typename enabled = void>
169
170
171
    class add_layer;

    template <typename T, typename U>
Davis King's avatar
Davis King committed
172
    struct is_nonloss_layer_type<add_layer<T,U>> : std::true_type {};
173

Davis King's avatar
Davis King committed
174
175
176
    template <typename LAYER_DETAILS, typename SUBNET>
    class add_layer<LAYER_DETAILS,SUBNET, 
            typename std::enable_if<is_nonloss_layer_type<SUBNET>::value>::type>
177
178
179
    {
    public:
        typedef LAYER_DETAILS layer_details_type;
Davis King's avatar
Davis King committed
180
181
182
183
        typedef SUBNET subnet_type;
        typedef typename subnet_type::input_type input_type;
        const static size_t num_layers = subnet_type::num_layers + 1;
        const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205

        add_layer(
        ):
            this_layer_setup_called(false),
            gradient_input_is_stale(true)
        {
        }

        add_layer(const add_layer&) = default;
        add_layer(add_layer&&) = default;
        add_layer& operator=(add_layer&&) = default;
        add_layer& operator=(const add_layer&) = default;

        template <typename T, typename U, typename E>
        friend class add_layer;

        // Allow copying networks from one to another as long as their corresponding 
        // layers can be constructed from each other.
        template <typename T, typename U, typename E>
        add_layer(
            const add_layer<T,U,E>& item
        ) :
Davis King's avatar
Davis King committed
206
            subnetwork(item.subnet()),
207
208
209
210
211
212
213
214
215
216
217
218
219
220
            details(item.layer_details()), 
            this_layer_setup_called(item.this_layer_setup_called),
            gradient_input_is_stale(item.gradient_input_is_stale),
            x_grad(item.x_grad),
            cached_output(item.cached_output)
        {
        }

        template <typename ...T>
        add_layer(
            const LAYER_DETAILS& layer_det, 
            T&& ...args
        ) : 
            details(layer_det), 
Davis King's avatar
Davis King committed
221
            subnetwork(std::forward<T>(args)...),
222
223
224
225
226
227
228
229
230
231
232
            this_layer_setup_called(false),
            gradient_input_is_stale(true)
        {
        }

        template <typename ...T>
        add_layer(
            LAYER_DETAILS&& layer_det, 
            T&& ...args
        ) : 
            details(std::move(layer_det)), 
Davis King's avatar
Davis King committed
233
            subnetwork(std::forward<T>(args)...),
234
235
236
237
238
239
240
            this_layer_setup_called(false),
            gradient_input_is_stale(true)
        {
        }

        template <typename input_iterator>
        void to_tensor (
241
242
            input_iterator ibegin,
            input_iterator iend,
243
244
245
            resizable_tensor& data
        ) const
        {
Davis King's avatar
Davis King committed
246
            subnetwork.to_tensor(ibegin,iend,data);
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
        }

        template <typename input_iterator>
        const tensor& operator() (
            input_iterator ibegin,
            input_iterator iend
        )
        {
            to_tensor(ibegin,iend,temp_tensor);
            return forward(temp_tensor);
        }


        const tensor& operator() (const input_type& x)
        {
            return (*this)(&x, &x+1);
        }

        const tensor& forward(const tensor& x)
        {
Davis King's avatar
Davis King committed
267
268
            subnetwork.forward(x);
            const dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
            if (!this_layer_setup_called)
            {
                details.setup(wsub);
                this_layer_setup_called = true;
            }
            details.forward(wsub, cached_output);
            gradient_input_is_stale = true;
            return get_output();
        }

        const tensor& get_output() const { return cached_output; }
        tensor& get_gradient_input() 
        { 
            if (gradient_input_is_stale)
            {
                gradient_input_is_stale = false;
                x_grad.copy_size(get_output());
                x_grad = 0;
            }
            return x_grad; 
        }

        template <typename solver_type>
        void update(const tensor& x, sstack<solver_type,num_layers>& solvers)
        {
Davis King's avatar
Davis King committed
294
            dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
295
296
297
298
299
300
            params_grad.copy_size(details.get_layer_params());
            params_grad = 0;
            details.backward(get_gradient_input(), wsub, static_cast<tensor&>(params_grad));
            // Don't try to adjust the parameters if this layer doesn't have any.
            if (params_grad.size() != 0)
                solvers.top()(details, static_cast<const tensor&>(params_grad));
Davis King's avatar
Davis King committed
301
            subnetwork.update(x, solvers.pop());
302
303
        }

Davis King's avatar
Davis King committed
304
305
        const subnet_type& subnet() const { return subnetwork; }
        subnet_type& subnet() { return subnetwork; }
306
307
308
309
310
311
312
313
314
315
316

        const layer_details_type& layer_details() const { return details; } 
        layer_details_type& layer_details() { return details; } 

        void clean()
        {
            x_grad.clear();
            cached_output.clear();
            params_grad.clear();
            temp_tensor.clear();
            gradient_input_is_stale = true;
Davis King's avatar
Davis King committed
317
            subnetwork.clean();
318
319
        }

320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
        friend void serialize(const add_layer& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.subnetwork, out);
            serialize(item.details, out);
            serialize(item.this_layer_setup_called, out);
            serialize(item.gradient_input_is_stale, out);
            serialize(item.x_grad, out);
            serialize(item.cached_output, out);
        }

        friend void deserialize(add_layer& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing dlib::add_layer.");
            deserialize(item.subnetwork, in);
            deserialize(item.details, in);
            deserialize(item.this_layer_setup_called, in);
            deserialize(item.gradient_input_is_stale, in);
            deserialize(item.x_grad, in);
            deserialize(item.cached_output, in);
        }

346
347
348
    private:


Davis King's avatar
Davis King committed
349
        subnet_type subnetwork;
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
        LAYER_DETAILS details;
        bool this_layer_setup_called;
        bool gradient_input_is_stale;
        resizable_tensor x_grad;
        resizable_tensor cached_output; 

        // The following 2 objects don't logically contribute to the state of this class.
        // They are only here to prevent them from being reallocated over and over in
        // member functions.
        resizable_tensor params_grad; 
        resizable_tensor temp_tensor;

    };

// ----------------------------------------------------------------------------------------

366
// This version of add_layer handles the special case where the subnetwork being given is
Davis King's avatar
Davis King committed
367
// just an input layer object.
368
369
370
371
372
    template <typename LAYER_DETAILS, typename INPUT_LAYER, typename enabled>
    class add_layer
    {
    public:
        typedef LAYER_DETAILS layer_details_type;
Davis King's avatar
Davis King committed
373
        typedef INPUT_LAYER subnet_type;
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
        typedef typename INPUT_LAYER::input_type input_type;
        const static unsigned int sample_expansion_factor = INPUT_LAYER::sample_expansion_factor;
        const static size_t num_layers = 1;
        static_assert(sample_expansion_factor >= 1,
            "The input layer can't produce fewer output tensors than there are inputs.");

        add_layer(
        ): 
            this_layer_setup_called(false),
            gradient_input_is_stale(true) 
        {}

        add_layer(const add_layer&) = default;
        add_layer(add_layer&&) = default;
        add_layer& operator=(add_layer&&) = default;
        add_layer& operator=(const add_layer&) = default;

        template <typename T, typename U, typename E>
        friend class add_layer;

        // Allow copying networks from one to another as long as their corresponding 
        // layers can be constructed from each other.
        template <typename T, typename U, typename E>
        add_layer(
            const add_layer<T,U,E>& item
        ):
Davis King's avatar
Davis King committed
400
            input_layer(item.subnet()),
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
            details(item.layer_details()),
            this_layer_setup_called(item.this_layer_setup_called),
            gradient_input_is_stale(item.gradient_input_is_stale),
            x_grad(item.x_grad),
            cached_output(item.cached_output)
        {
        }

        add_layer(
            const LAYER_DETAILS& layer_det
        ) : 
            details(layer_det), 
            this_layer_setup_called(false),
            gradient_input_is_stale(true) 
        {}

        add_layer(
            LAYER_DETAILS&& layer_det
        ) : 
            details(std::move(layer_det)), 
            this_layer_setup_called(false),
            gradient_input_is_stale(true) 
        {}

        add_layer(
            LAYER_DETAILS layer_det, 
            INPUT_LAYER il
        ) : 
429
430
            details(std::move(layer_det)),
            input_layer(std::move(il)),
431
432
433
434
435
436
            this_layer_setup_called(false),
            gradient_input_is_stale(true)
        {}

        template <typename input_iterator>
        void to_tensor (
437
438
            input_iterator ibegin,
            input_iterator iend,
439
440
441
            resizable_tensor& data
        ) const
        {
442
            input_layer.to_tensor(ibegin, iend, data);
443
            // make sure the input layer's to_tensor() function is implemented properly.
444
            DLIB_CASSERT(std::distance(ibegin,iend)*sample_expansion_factor == data.num_samples(),"");
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
            data.async_copy_to_device();
        }


        template <typename input_iterator>
        const tensor& operator() (
            input_iterator ibegin,
            input_iterator iend
        )
        {
            to_tensor(ibegin,iend,temp_tensor);
            return forward(temp_tensor);
        }


        const tensor& operator() (const input_type& x)
        {
            return (*this)(&x, &x+1);
        }

        const tensor& forward (const tensor& x)
        {
            DLIB_CASSERT(x.num_samples()%sample_expansion_factor == 0,"");
Davis King's avatar
Davis King committed
468
            subnet_wrapper wsub(x, grad_final_ignored);
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
            if (!this_layer_setup_called)
            {
                details.setup(wsub);
                this_layer_setup_called = true;
            }
            details.forward(wsub, cached_output);
            gradient_input_is_stale = true;
            return get_output();
        }

        const tensor& get_output() const { return cached_output; }
        tensor& get_gradient_input() 
        { 
            if (gradient_input_is_stale)
            {
                gradient_input_is_stale = false;
                x_grad.copy_size(get_output());
                x_grad = 0;
            }
            return x_grad; 
        }


        template <typename solver_type>
        void update(const tensor& x, sstack<solver_type,num_layers>& solvers)
        {
Davis King's avatar
Davis King committed
495
            subnet_wrapper wsub(x, grad_final_ignored);
496
497
498
499
500
501
502
503
            params_grad.copy_size(details.get_layer_params());
            params_grad = 0;
            details.backward(get_gradient_input(), wsub, static_cast<tensor&>(params_grad));
            // Don't try to adjust the parameters if this layer doesn't have any.
            if (params_grad.size() != 0)
                solvers.top()(details, static_cast<const tensor&>(params_grad));
        }

Davis King's avatar
Davis King committed
504
505
        const subnet_type& subnet() const { return input_layer; } 
        subnet_type& subnet() { return input_layer; } 
506
507
508
509
510
511
512
513
514
515
516
517
518
519

        const layer_details_type& layer_details() const { return details; } 
        layer_details_type& layer_details() { return details; } 

        void clean()
        {
            x_grad.clear();
            grad_final_ignored.clear();
            cached_output.clear();
            params_grad.clear();
            temp_tensor.clear();
            gradient_input_is_stale = true;
        }

520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
        friend void serialize(const add_layer& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.input_layer, out);
            serialize(item.details, out);
            serialize(item.this_layer_setup_called, out);
            serialize(item.gradient_input_is_stale, out);
            serialize(item.x_grad, out);
            serialize(item.cached_output, out);
        }

        friend void deserialize(add_layer& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing dlib::add_layer.");
            deserialize(item.input_layer, in);
            deserialize(item.details, in);
            deserialize(item.this_layer_setup_called, in);
            deserialize(item.gradient_input_is_stale, in);
            deserialize(item.x_grad, in);
            deserialize(item.cached_output, in);
        }

546
547
    private:

Davis King's avatar
Davis King committed
548
        class subnet_wrapper
549
550
        {
        public:
Davis King's avatar
Davis King committed
551
            subnet_wrapper(const tensor& x_, resizable_tensor& grad_final_ignored_) :
552
553
                x(x_), grad_final_ignored(grad_final_ignored_) {}

Davis King's avatar
Davis King committed
554
555
            subnet_wrapper(const subnet_wrapper&) = delete;
            subnet_wrapper& operator=(const subnet_wrapper&) = delete;
556

557
558
559
560
561
            const tensor& get_output() const { return x; }
            tensor& get_gradient_input() 
            { 
                // It doesn't matter what values are in this tensor but client code will
                // always assume it's the same dimension as the output so make sure that is
562
563
564
565
566
567
568
                // the case.  Note that we do set it to a non-crazy value though to avoid
                // it being full of NaN and slowing the processing down.
                if (!have_same_dimensions(x, grad_final_ignored))
                {
                    grad_final_ignored.copy_size(x);
                    grad_final_ignored = 0;  
                }
569
570
571
572
573
574
575
576
                return grad_final_ignored; 
            }

        private:
            const tensor& x;
            resizable_tensor& grad_final_ignored;
        };

Davis King's avatar
Davis King committed
577
        subnet_type input_layer;
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
        LAYER_DETAILS details;
        bool this_layer_setup_called;
        bool gradient_input_is_stale;
        resizable_tensor x_grad; 
        resizable_tensor cached_output; 

        // The following 3 objects don't logically contribute to the state of this class.
        // They are only here to prevent them from being reallocated over and over in
        // member functions.
        resizable_tensor params_grad; 
        resizable_tensor temp_tensor; 
        resizable_tensor grad_final_ignored;
    };

// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
594
    template <unsigned long ID, typename SUBNET, typename enabled=void>
595
596
    class add_tag_layer;

Davis King's avatar
Davis King committed
597
598
599
    template <unsigned long ID, typename SUBNET>
    class add_tag_layer<ID,SUBNET,
            typename std::enable_if<is_nonloss_layer_type<SUBNET>::value>::type>
600
601
    {
    public:
Davis King's avatar
Davis King committed
602
603
604
605
        typedef SUBNET subnet_type;
        typedef typename subnet_type::input_type input_type;
        const static size_t num_layers = subnet_type::num_layers + 1;
        const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
606
607
608
        static_assert(sample_expansion_factor >= 1,
            "The input layer can't produce fewer output tensors than there are inputs.");

Davis King's avatar
Davis King committed
609
610
611
612
613
        add_tag_layer() = default;
        add_tag_layer(const add_tag_layer&) = default;
        add_tag_layer(add_tag_layer&&) = default;
        add_tag_layer& operator=(add_tag_layer&&) = default;
        add_tag_layer& operator=(const add_tag_layer&) = default;
614
615

        template <typename T>
Davis King's avatar
Davis King committed
616
617
        add_tag_layer(
            const add_tag_layer<ID,T>& item
Davis King's avatar
Davis King committed
618
        ) : subnetwork(item.subnet())
619
620
621
        {}

        template <typename ...T>
Davis King's avatar
Davis King committed
622
        add_tag_layer(
623
624
            T ...args
        ) : 
Davis King's avatar
Davis King committed
625
            subnetwork(std::move(args)...) 
626
627
628
629
630
        {
        }

        template <typename input_iterator>
        void to_tensor (
631
632
            input_iterator ibegin,
            input_iterator iend,
633
634
635
            resizable_tensor& data
        ) const
        {
Davis King's avatar
Davis King committed
636
            subnetwork.to_tensor(ibegin,iend,data);
637
638
639
640
641
642
643
644
        }

        template <typename input_iterator>
        const tensor& operator() (
            input_iterator ibegin,
            input_iterator iend
        )
        {
Davis King's avatar
Davis King committed
645
            return subnetwork(ibegin,iend);
646
647
648
649
        }

        const tensor& operator() (const input_type& x)
        {
Davis King's avatar
Davis King committed
650
            return subnetwork(x);
651
652
653
654
        }

        const tensor& forward(const tensor& x)
        {
Davis King's avatar
Davis King committed
655
            return subnetwork.forward(x);
656
657
        }

Davis King's avatar
Davis King committed
658
        const tensor& get_output() const { return subnetwork.get_output(); }
659
660
661

        tensor& get_gradient_input() 
        { 
Davis King's avatar
Davis King committed
662
            return subnetwork.get_gradient_input();
663
664
665
666
667
        }

        template <typename solver_type>
        void update(const tensor& x, sstack<solver_type,num_layers>& solvers)
        {
Davis King's avatar
Davis King committed
668
            subnetwork.update(x,solvers.pop());
669
670
        }

Davis King's avatar
Davis King committed
671
672
        const subnet_type& subnet() const { return subnetwork; }
        subnet_type& subnet() { return subnetwork; }
673
674
675

        void clean()
        {
Davis King's avatar
Davis King committed
676
            subnetwork.clean();
677
678
        }

679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
        friend void serialize(const add_tag_layer& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.subnetwork, out);
        }

        friend void deserialize(add_tag_layer& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing dlib::add_tag_layer.");
            deserialize(item.subnetwork, in);
        }

695
696
    private:

Davis King's avatar
Davis King committed
697
        subnet_type subnetwork;
698
699
    };

700
701
// ----------------------------------------------------------------------------------------

702
// This version of add_tag_layer handles the special case where the subnetwork being given
703
704
705
706
707
// is just an input layer object.
    template <unsigned long ID, typename INPUT_LAYER, typename enabled>
    class add_tag_layer
    {
    public:
Davis King's avatar
Davis King committed
708
709
        typedef INPUT_LAYER subnet_type;
        typedef typename subnet_type::input_type input_type;
710
        const static size_t num_layers = 1;
Davis King's avatar
Davis King committed
711
        const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
712
713
714
715
716
717
718
719
720
721
722
723
        static_assert(sample_expansion_factor >= 1,
            "The input layer can't produce fewer output tensors than there are inputs.");

        add_tag_layer() = default;
        add_tag_layer(const add_tag_layer&) = default;
        add_tag_layer(add_tag_layer&&) = default;
        add_tag_layer& operator=(add_tag_layer&&) = default;
        add_tag_layer& operator=(const add_tag_layer&) = default;

        template <typename T, typename E>
        add_tag_layer(
            const add_tag_layer<ID,T,E>& item
Davis King's avatar
Davis King committed
724
        ) : input_layer(item.subnet())
725
726
727
728
729
730
731
732
733
734
735
736
        {}

        template <typename ...T>
        add_tag_layer(
            T ...args
        ) : 
            input_layer(std::move(args)...) 
        {
        }

        template <typename input_iterator>
        void to_tensor (
737
738
            input_iterator ibegin,
            input_iterator iend,
739
740
741
            resizable_tensor& data
        ) const
        {
742
            input_layer.to_tensor(ibegin,iend,data);
743
744
745
746
        }

        template <typename input_iterator>
        const tensor& operator() (
747
            input_iterator ibegin, 
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
            input_iterator iend
        )
        {
            input_layer.to_tensor(ibegin,iend,cached_output);
            return get_output();
        }

        const tensor& operator() (const input_type& x)
        {
            return (*this)(&x, &x+1);
        }

        const tensor& forward(const tensor& x)
        {
            cached_output = x;
            return get_output();
        }

        const tensor& get_output() const 
        { 
            return cached_output; 
        }

        tensor& get_gradient_input() 
        { 
            if (!have_same_dimensions(cached_output, grad_final_ignored))
            {
                grad_final_ignored.copy_size(get_output());
                grad_final_ignored = 0;
            }
            return grad_final_ignored; 
        }

        template <typename solver_type>
        void update(const tensor& /*x*/, sstack<solver_type,num_layers>& /*solvers*/)
        {
            // nothing to update
        }

Davis King's avatar
Davis King committed
787
788
        const subnet_type& subnet() const { return input_layer; }
        subnet_type& subnet() { return input_layer; }
789
790
791
792
793
794
795

        void clean()
        {
            grad_final_ignored.clear();
            cached_output.clear();
        }

796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
        friend void serialize(const add_tag_layer& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.input_layer, out);
            serialize(item.cached_output, out);
            serialize(item.grad_final_ignored, out);
        }

        friend void deserialize(add_tag_layer& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing dlib::add_tag_layer.");
            deserialize(item.input_layer, in);
            deserialize(item.cached_output, in);
            deserialize(item.grad_final_ignored, in);
        }

816
817
    private:

Davis King's avatar
Davis King committed
818
        subnet_type input_layer;
819
820
821
822
823
824
        resizable_tensor cached_output;
        resizable_tensor grad_final_ignored;
    };

    template <unsigned long ID, typename U, typename E>
    struct is_nonloss_layer_type<add_tag_layer<ID,U,E>> : std::true_type {};
825
826
827
828
829
830


// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
831
    template <typename LOSS_DETAILS, typename SUBNET>
Davis King's avatar
Davis King committed
832
    class add_loss_layer;
833
834
835
836
837

    class no_label_type
    {
    private:
        // We don't want anyone making these no_label_type objects.  They are here only to
Davis King's avatar
Davis King committed
838
839
        // allow add_loss_layer::label_type and dnn_trainer::label_type to exist which voids
        // needing to overload add_loss_layer and dnn_trainer for supervised an unsupervised
840
841
        // losses.  It also can be a type to use in template metaprogramming to indicate
        // "no label".  So here we make the constructor private with the exception that
Davis King's avatar
Davis King committed
842
        // add_loss_layer objects can make it (again, just to simplify add_loss_layer's
843
844
        // implementation).
        no_label_type()=default;
Davis King's avatar
Davis King committed
845
        template <typename LOSS_DETAILS, typename SUBNET> friend class add_loss_layer;
846
847
848
849
    };

// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
850
    template <typename LOSS_DETAILS, typename SUBNET>
Davis King's avatar
Davis King committed
851
    class add_loss_layer
852
853
854
855
856
857
858
859
860
861
862
863
864
865
    {
        template <typename T, typename enabled=void>
        struct get_loss_layer_label_type
        {
            typedef no_label_type type;
        };
        template <typename T>
        struct get_loss_layer_label_type<T,typename std::enable_if<sizeof(typename T::label_type)!=0>::type>
        {
            typedef typename T::label_type type;
        };

    public:
        typedef LOSS_DETAILS loss_details_type;
Davis King's avatar
Davis King committed
866
867
        typedef SUBNET subnet_type;
        typedef typename subnet_type::input_type input_type;
868
        // Note that the loss layer doesn't count as an additional layer.
Davis King's avatar
Davis King committed
869
870
        const static size_t num_layers = subnet_type::num_layers;
        const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
871
872
        typedef typename get_loss_layer_label_type<LOSS_DETAILS>::type label_type;

873
874
        static_assert(is_nonloss_layer_type<SUBNET>::value, 
            "SUBNET must be of type add_layer, add_skip_layer, or add_tag_layer."); 
875
876
877
878
        static_assert(sample_expansion_factor == LOSS_DETAILS::sample_expansion_factor,
            "The loss layer and input layer must agree on the sample_expansion_factor.");


Davis King's avatar
Davis King committed
879
880
881
882
883
        add_loss_layer() = default;
        add_loss_layer(const add_loss_layer&) = default;
        add_loss_layer(add_loss_layer&&) = default;
        add_loss_layer& operator=(add_loss_layer&&) = default;
        add_loss_layer& operator=(const add_loss_layer&) = default;
884
885

        template <typename T, typename U>
Davis King's avatar
Davis King committed
886
887
        add_loss_layer(
            const add_loss_layer<T,U>& item
888
889
        ) : 
            loss(item.loss_details()),
Davis King's avatar
Davis King committed
890
            subnetwork(item.subnet())
891
892
893
        {}

        template <typename ...T>
Davis King's avatar
Davis King committed
894
        add_loss_layer(
895
896
897
898
            const LOSS_DETAILS& layer_det, 
            T&& ...args
        ) : 
            loss(layer_det), 
Davis King's avatar
Davis King committed
899
            subnetwork(std::forward<T>(args)...)
900
901
902
903
        {
        }

        template <typename ...T>
Davis King's avatar
Davis King committed
904
        add_loss_layer(
905
906
907
908
            LOSS_DETAILS&& layer_det, 
            T&& ...args
        ) : 
            loss(std::move(layer_det)), 
Davis King's avatar
Davis King committed
909
            subnetwork(std::forward<T>(args)...)
910
911
912
913
        {
        }

        template <typename ...T>
Davis King's avatar
Davis King committed
914
        add_loss_layer(
915
916
            T ...args
        ) : 
Davis King's avatar
Davis King committed
917
            subnetwork(std::move(args)...)
918
        {
919
920
921
922
923
924
925
926
927
        }

        template <typename input_iterator>
        void to_tensor (
            input_iterator ibegin,
            input_iterator iend,
            resizable_tensor& data
        ) const
        {
Davis King's avatar
Davis King committed
928
            subnetwork.to_tensor(ibegin,iend,data);
929
930
931
932
933
934
935
936
        }

        template <typename output_iterator>
        void operator() (
            const tensor& x, 
            output_iterator obegin
        )
        {
Davis King's avatar
Davis King committed
937
938
            subnetwork.forward(x);
            const dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
939
            loss.to_label(wsub, obegin);
940
941
942
943
944
945
946
947
948
        }

        template <typename input_iterator, typename output_iterator>
        void operator() (
            input_iterator ibegin,
            input_iterator iend,
            output_iterator obegin
        )
        {
949
950
            to_tensor(ibegin,iend,temp_tensor);
            (*this)(temp_tensor, obegin);
951
952
953
954
955
956
957
958
        }

        const label_type& operator() (const input_type& x)
        {
            (*this)(&x, &x+1, &temp_label);
            return temp_label;
        }

959
960
961
962
963
964
        template <typename label_iterator>
        double compute_loss (
            const tensor& x,
            label_iterator lbegin 
        )
        {
Davis King's avatar
Davis King committed
965
966
            subnetwork.forward(x);
            dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
967
968
            return loss.compute_loss(x, lbegin, wsub);
        }
969
970
971
972
973
974
975
976

        template <typename input_iterator, typename label_iterator>
        double compute_loss (
            input_iterator ibegin,
            input_iterator iend,
            label_iterator lbegin 
        )
        {
977
978
979
980
981
982
983
984
            to_tensor(ibegin,iend,temp_tensor);
            return compute_loss(temp_tensor, lbegin);
        }

        double compute_loss (
            const tensor& x
        )
        {
Davis King's avatar
Davis King committed
985
986
            subnetwork.forward(x);
            dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
987
            return loss.compute_loss(x, wsub);
988
989
990
991
992
993
994
995
        }

        template <typename input_iterator>
        double compute_loss (
            input_iterator ibegin,
            input_iterator iend
        )
        {
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
            to_tensor(ibegin,iend,temp_tensor);
            return compute_loss(temp_tensor);
        }

        template <typename label_iterator, typename solver_type>
        double update (
            const tensor& x,
            label_iterator lbegin,
            sstack<solver_type,num_layers>& solvers
        )
        {
Davis King's avatar
Davis King committed
1007
1008
            subnetwork.forward(x);
            dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
1009
            double l = loss.compute_loss(x, lbegin, wsub);
Davis King's avatar
Davis King committed
1010
            subnetwork.update(x, solvers);
1011
            return l;
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
        }

        template <typename input_iterator, typename label_iterator, typename solver_type>
        double update (
            input_iterator ibegin,
            input_iterator iend,
            label_iterator lbegin,
            sstack<solver_type,num_layers>& solvers
        )
        {
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
            to_tensor(ibegin,iend,temp_tensor);
            return update(temp_tensor, lbegin, solvers);
        }

        template <typename solver_type>
        double update (
            const tensor& x,
            sstack<solver_type,num_layers>& solvers
        )
        {
Davis King's avatar
Davis King committed
1032
1033
            subnetwork.forward(x);
            dimpl::subnet_wrapper<subnet_type> wsub(subnetwork);
1034
            double l = loss.compute_loss(x, wsub);
Davis King's avatar
Davis King committed
1035
            subnetwork.update(x, solvers);
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
            return l;
        }

        template <typename input_iterator, typename solver_type>
        double update (
            input_iterator ibegin,
            input_iterator iend,
            sstack<solver_type,num_layers>& solvers
        )
        {
1046
1047
            to_tensor(ibegin,iend,temp_tensor);
            return update(temp_tensor, solvers);
1048
1049
        }

Davis King's avatar
Davis King committed
1050
1051
        const subnet_type& subnet() const { return subnetwork; }
        subnet_type& subnet() { return subnetwork; }
1052
1053
1054
1055
1056
1057
1058
        const loss_details_type& loss_details() const { return loss; }
        loss_details_type& loss_details() { return loss; }

        void clean (
        )
        {
            temp_tensor.clear();
1059
            subnetwork.clean();
1060
1061
        }

1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
        friend void serialize(const add_loss_layer& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.loss, out);
            serialize(item.subnetwork, out);
        }

        friend void deserialize(add_loss_layer& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing dlib::add_loss_layer.");
            deserialize(item.loss, in);
            deserialize(item.subnetwork, in);
        }

1080
1081
1082
    private:

        loss_details_type loss;
Davis King's avatar
Davis King committed
1083
        subnet_type subnetwork;
1084
1085
1086
1087
1088
1089
1090
1091
1092

        // These two objects don't logically contribute to the state of this object.  They
        // are here to prevent them from being reallocated over and over.
        label_type temp_label;
        resizable_tensor temp_tensor;
    };


    template <typename T, typename U>
Davis King's avatar
Davis King committed
1093
    struct is_loss_layer_type<add_loss_layer<T,U>> : std::true_type {};
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104

// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------

    namespace impl
    {
        template <unsigned int i, typename T>
        struct layer_helper
        {
            static T& makeT();
Davis King's avatar
Davis King committed
1105
            using next_type = typename std::remove_reference<decltype(makeT().subnet())>::type;
1106
1107
1108
            using type = typename layer_helper<i-1,next_type>::type;
            static type& layer(T& n)
            {
Davis King's avatar
Davis King committed
1109
                return layer_helper<i-1,next_type>::layer(n.subnet());
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
            }
        };
        template <typename T>
        struct layer_helper<0,T>
        {
            using type = T;
            static type& layer(T& n)
            {
                return n;
            }
        };

        template <template<typename> class Match, typename T, unsigned int i, typename enabled = void>
        struct layer_helper_match
        {
            static T& makeT();
Davis King's avatar
Davis King committed
1126
            using next_type = typename std::remove_reference<decltype(makeT().subnet())>::type;
1127
1128
1129
            using type = typename layer_helper_match<Match,next_type,i>::type;
            static type& layer(T& n)
            {
Davis King's avatar
Davis King committed
1130
                return layer_helper_match<Match,next_type,i>::layer(n.subnet());
1131
1132
            }
        };
Davis King's avatar
Davis King committed
1133
        // This overload catches add_layer and add_loss_layer templates.
1134
1135
        template <template<typename> class Match, typename T, unsigned int i>
        struct layer_helper_match<Match,T,i,
Davis King's avatar
Davis King committed
1136
            typename std::enable_if<std::is_same<const T,const  Match<typename T::subnet_type>>::value>::type>
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
        {
            using type = typename layer_helper<i,T>::type;
            static type& layer(T& n)
            {
                return layer_helper<i,T>::layer(n);
            }
        };
        // This overload catches input templates.
        template <template<typename> class Match, typename T, unsigned int i>
        struct layer_helper_match<Match,T,i,
            typename std::enable_if<std::is_same<const T,const  Match<typename T::input_type>>::value>::type>
        {
            using type = typename layer_helper<i,T>::type;
            static type& layer(T& n)
            {
                return layer_helper<i,T>::layer(n);
            }
        };
Davis King's avatar
Davis King committed
1155
        // This overload catches subnet_wrapper templates.
1156
1157
1158
        template <template<typename> class Match, typename T, unsigned int i>
        struct layer_helper_match<Match,T,i,
            typename std::enable_if<std::is_same<const typename T::wrapped_type, 
Davis King's avatar
Davis King committed
1159
                                                 const Match<typename T::wrapped_type::subnet_type>>::value>::type>
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
        {
            using type = typename layer_helper<i,T>::type;
            static type& layer(T& n)
            {
                return layer_helper<i,T>::layer(n);
            }
        };
    }

    template <unsigned int i, typename T>
    typename impl::layer_helper<i,T>::type& layer (T& n) 
    {
        return impl::layer_helper<i,T>::layer(n);
    }

    template <template<typename> class Match, typename T>
    typename impl::layer_helper_match<Match,T,0>::type& layer (T& n) 
    {
        return impl::layer_helper_match<Match,T,0>::layer(n);
    }

    template <template<typename> class Match, unsigned int i, typename T>
    typename impl::layer_helper_match<Match,T,i>::type& layer (T& n) 
    {
        return impl::layer_helper_match<Match,T,i>::layer(n);
    }

// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
1189
    template <template<typename> class TAG_TYPE, typename SUBNET>
Davis King's avatar
Davis King committed
1190
    class add_skip_layer
1191
1192
    {
    public:
Davis King's avatar
Davis King committed
1193
1194
1195
1196
        typedef SUBNET subnet_type;
        typedef typename subnet_type::input_type input_type;
        const static size_t num_layers = subnet_type::num_layers + 1;
        const static unsigned int sample_expansion_factor = subnet_type::sample_expansion_factor;
1197
1198
1199
        static_assert(sample_expansion_factor >= 1,
            "The input layer can't produce fewer output tensors than there are inputs.");

Davis King's avatar
Davis King committed
1200
1201
1202
1203
1204
        add_skip_layer() = default;
        add_skip_layer(const add_skip_layer&) = default;
        add_skip_layer(add_skip_layer&&) = default;
        add_skip_layer& operator=(add_skip_layer&&) = default;
        add_skip_layer& operator=(const add_skip_layer&) = default;
1205
1206

        template <typename T>
Davis King's avatar
Davis King committed
1207
1208
        add_skip_layer(
            const add_skip_layer<TAG_TYPE,T>& item
Davis King's avatar
Davis King committed
1209
        ) : subnetwork(item.subnet())
1210
1211
1212
        {}

        template <typename ...T>
Davis King's avatar
Davis King committed
1213
        add_skip_layer(
1214
1215
            T ...args
        ) : 
Davis King's avatar
Davis King committed
1216
            subnetwork(std::move(args)...) 
1217
1218
1219
1220
1221
        {
        }

        template <typename input_iterator>
        void to_tensor (
1222
1223
            input_iterator ibegin,
            input_iterator iend,
1224
1225
1226
            resizable_tensor& data
        ) const
        {
Davis King's avatar
Davis King committed
1227
            subnetwork.to_tensor(ibegin,iend,data);
1228
1229
1230
1231
1232
1233
1234
1235
        }

        template <typename input_iterator>
        const tensor& operator() (
            input_iterator ibegin,
            input_iterator iend
        )
        {
Davis King's avatar
Davis King committed
1236
1237
            subnetwork(ibegin,iend);
            return layer<TAG_TYPE>(subnetwork).get_output();
1238
1239
1240
1241
        }

        const tensor& operator() (const input_type& x)
        {
Davis King's avatar
Davis King committed
1242
1243
            subnetwork(x);
            return layer<TAG_TYPE>(subnetwork).get_output();
1244
1245
1246
1247
        }

        const tensor& forward(const tensor& x)
        {
Davis King's avatar
Davis King committed
1248
1249
            subnetwork.forward(x);
            return layer<TAG_TYPE>(subnetwork).get_output();
1250
1251
1252
1253
        }

        const tensor& get_output() const 
        { 
Davis King's avatar
Davis King committed
1254
            return layer<TAG_TYPE>(subnetwork).get_output();
1255
1256
1257
1258
        }

        tensor& get_gradient_input() 
        { 
Davis King's avatar
Davis King committed
1259
            return layer<TAG_TYPE>(subnetwork).get_gradient_input();
1260
1261
1262
1263
1264
        }

        template <typename solver_type>
        void update(const tensor& x, sstack<solver_type,num_layers>& solvers)
        {
Davis King's avatar
Davis King committed
1265
            subnetwork.update(x,solvers.pop());
1266
1267
        }

Davis King's avatar
Davis King committed
1268
        const subnet_type& subnet() const 
1269
        { 
Davis King's avatar
Davis King committed
1270
            return subnetwork; 
1271
1272
        }

Davis King's avatar
Davis King committed
1273
        subnet_type& subnet() 
1274
        { 
Davis King's avatar
Davis King committed
1275
            return subnetwork; 
1276
1277
1278
1279
        }

        void clean()
        {
Davis King's avatar
Davis King committed
1280
            subnetwork.clean();
1281
1282
        }

1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
        friend void serialize(const add_skip_layer& item, std::ostream& out)
        {
            int version = 1;
            serialize(version, out);
            serialize(item.subnetwork, out);
        }

        friend void deserialize(add_skip_layer& item, std::istream& in)
        {
            int version = 0;
            deserialize(version, in);
            if (version != 1)
                throw serialization_error("Unexpected version found while deserializing dlib::add_skip_layer.");
            deserialize(item.subnetwork, in);
        }

1299
1300
    private:

Davis King's avatar
Davis King committed
1301
        subnet_type subnetwork;
1302
1303
    };
    template <template<typename> class T, typename U>
Davis King's avatar
Davis King committed
1304
1305
    struct is_nonloss_layer_type<add_skip_layer<T,U>> : std::true_type {};

Davis King's avatar
Davis King committed
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
    template <typename SUBNET> using tag1  = add_tag_layer< 1, SUBNET>;
    template <typename SUBNET> using tag2  = add_tag_layer< 2, SUBNET>;
    template <typename SUBNET> using tag3  = add_tag_layer< 3, SUBNET>;
    template <typename SUBNET> using tag4  = add_tag_layer< 4, SUBNET>;
    template <typename SUBNET> using tag5  = add_tag_layer< 5, SUBNET>;
    template <typename SUBNET> using tag6  = add_tag_layer< 6, SUBNET>;
    template <typename SUBNET> using tag7  = add_tag_layer< 7, SUBNET>;
    template <typename SUBNET> using tag8  = add_tag_layer< 8, SUBNET>;
    template <typename SUBNET> using tag9  = add_tag_layer< 9, SUBNET>;
    template <typename SUBNET> using tag10 = add_tag_layer<10, SUBNET>;

    template <typename SUBNET> using skip1  = add_skip_layer< tag1, SUBNET>;
    template <typename SUBNET> using skip2  = add_skip_layer< tag2, SUBNET>;
    template <typename SUBNET> using skip3  = add_skip_layer< tag3, SUBNET>;
    template <typename SUBNET> using skip4  = add_skip_layer< tag4, SUBNET>;
    template <typename SUBNET> using skip5  = add_skip_layer< tag5, SUBNET>;
    template <typename SUBNET> using skip6  = add_skip_layer< tag6, SUBNET>;
    template <typename SUBNET> using skip7  = add_skip_layer< tag7, SUBNET>;
    template <typename SUBNET> using skip8  = add_skip_layer< tag8, SUBNET>;
    template <typename SUBNET> using skip9  = add_skip_layer< tag9, SUBNET>;
    template <typename SUBNET> using skip10 = add_skip_layer<tag10, SUBNET>;
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342

// ----------------------------------------------------------------------------------------

    namespace timpl
    {
        void fill_with_gassuan_random_numbers (
            tensor& t,
            dlib::rand& rnd,
            double sigma = 1
        )
        {
            float* data = t.host();
            for (size_t i = 0; i < t.size(); ++i)
                data[i] = rnd.get_random_gaussian()*sigma;
        }

Davis King's avatar
Davis King committed
1343
        class test_layer_subnet 
1344
1345
        {
        public:
Davis King's avatar
Davis King committed
1346
            test_layer_subnet (
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
                dlib::rand& rnd_
            ) : rnd(rnd_) 
            {
                // Output and gradient_input have to have the same dimensions in each
                // layer.
                const long num_samples = rnd.get_random_32bit_number()%4+3;
                const long nr = rnd.get_random_32bit_number()%4+2;
                const long nc = rnd.get_random_32bit_number()%4+2;
                const long k  = rnd.get_random_32bit_number()%4+2;

                output.set_size(num_samples, nr, nc, k);
                gradient_input.set_size(num_samples, nr, nc, k);

                // Use a non-zero initial gradient to make sure the layers add to it
                // rather than assign and blow away the initial value.
                fill_with_gassuan_random_numbers(gradient_input, rnd, 0.01);

                fill_with_gassuan_random_numbers(output, rnd);
            }


            const tensor& get_output() const { return output; }
Davis King's avatar
Davis King committed
1369
            const test_layer_subnet& subnet() const { init_sub(); return *subnetwork; }
1370
1371

            tensor& get_gradient_input() { return gradient_input; }
Davis King's avatar
Davis King committed
1372
            test_layer_subnet& subnet() { init_sub(); return *subnetwork; }
1373
1374
1375
1376
1377



            unsigned long count_outputs() const
            {
Davis King's avatar
Davis King committed
1378
1379
                if (subnetwork)
                    return subnetwork->count_outputs() + output.size();
1380
1381
1382
1383
1384
1385
1386
1387
1388
                else
                    return output.size();
            }

            float& get_output_element(unsigned long i)
            {
                if (i < output.size())
                    return output.host()[i];
                else
Davis King's avatar
Davis King committed
1389
                    return subnet().get_output_element(i-output.size());
1390
1391
1392
1393
1394
1395
1396
            }

            float get_gradient_input_element(unsigned long i) const
            {
                if (i < gradient_input.size())
                    return gradient_input.host()[i];
                else
Davis King's avatar
Davis King committed
1397
                    return subnet().get_gradient_input_element(i-gradient_input.size());
1398
1399
1400
1401
1402
            }


        private:
            // We lazily initialize sub-layers as needed when someone tries to call
Davis King's avatar
Davis King committed
1403
            // subnet()
1404
1405
            void init_sub() const
            {
Davis King's avatar
Davis King committed
1406
1407
                if (!subnetwork)
                    subnetwork.reset(new test_layer_subnet(rnd));
1408
1409
1410
            }

            dlib::rand& rnd;
Davis King's avatar
Davis King committed
1411
            mutable std::unique_ptr<test_layer_subnet> subnetwork;
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
            resizable_tensor output;
            resizable_tensor gradient_input;
        };


        void print_tensor(
            const tensor& a
        )
        {
            auto data = a.host();
            for (size_t i = 0; i < a.size(); ++i)
                std::cout << data[i] << " ";
            std::cout << std::endl;
        }
    }

    template <
        typename layer_details_type
        >
    void test_layer (
        layer_details_type l
    )
    {
        const float base_eps = 0.01;
        using namespace timpl;
        // Do some setup
        dlib::rand rnd;
Davis King's avatar
Davis King committed
1439
        test_layer_subnet subnetwork(rnd);
1440
        resizable_tensor output, out2, out3;
Davis King's avatar
Davis King committed
1441
        // Run setup() and forward() as well to make sure any calls to subnet() have
1442
        // happened before we start assuming we know how many data elements there are
Davis King's avatar
Davis King committed
1443
1444
        // (since we do a lazy layer creation thing based on calls to subnet() inside
        // test_layer_subnet).
Davis King's avatar
Davis King committed
1445
1446
        l.setup(subnetwork);
        l.forward(subnetwork, output);
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456

        resizable_tensor input_grad;
        input_grad.copy_size(output);
        std::cout << "output.num_samples(): "<< output.num_samples() << std::endl;
        fill_with_gassuan_random_numbers(input_grad, rnd);

        // The f() we are computing gradients of is this thing.  It's value at the current
        // parameter and data values is:
        std::cout << "f(data,params): " << dot(output, input_grad) << std::endl;

Davis King's avatar
Davis King committed
1457
        // We are going to save a copy of the subnetwork.get_gradient_input() data before we do
1458
1459
1460
        // backpropagation since the backward() function is supposed to *add* to the
        // gradients rather than overwrite them.  We will use this saved data to check if
        // that is the case.
Davis King's avatar
Davis King committed
1461
        const unsigned long num_data_inputs = subnetwork.count_outputs();
1462
1463
        std::vector<float> initial_gradient_input(num_data_inputs);
        for (unsigned long i = 0; i < num_data_inputs; ++i)
Davis King's avatar
Davis King committed
1464
            initial_gradient_input[i] = subnetwork.get_gradient_input_element(i);
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474


        // Now tell the layer to compute all the gradients.  In the rest of this function
        // we will just be checking that these gradients were computed correctly by
        // comparing them to a central differences approximation.
        resizable_tensor params_grad, random_noise;
        params_grad.copy_size(l.get_layer_params());
        random_noise.copy_size(l.get_layer_params());
        randomize_parameters(random_noise, 5, rnd);
        params_grad = random_noise;
Davis King's avatar
Davis King committed
1475
        l.backward(input_grad, subnetwork, params_grad);
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489

        running_stats<double> rs_param, rs_data;

        // ==================================================================
        // first validate the way the parameter gradients are computed
        for (long i = 0; i < params_grad.size(); ++i)
        {
            layer_details_type l1(l);

            float eps = l1.get_layer_params().host()[i]*base_eps;
            if (eps == 0)
                eps = base_eps;
            const float oldval = l1.get_layer_params().host()[i];
            l1.get_layer_params().host()[i] = oldval+eps;
Davis King's avatar
Davis King committed
1490
            l1.forward(subnetwork, out2);
1491
            l1.get_layer_params().host()[i] = oldval-eps;
Davis King's avatar
Davis King committed
1492
            l1.forward(subnetwork, out3);
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513

            // Compute a reference derivative via a central differences approximation and
            // compare it to the one output by the layer and make sure they match.
            double reference_derivative = (dot(out2,input_grad)-dot(out3, input_grad))/(2*eps);
            double output_derivative = params_grad.host()[i]-random_noise.host()[i];
            double relative_error = (reference_derivative - output_derivative)/(reference_derivative + 1e-100);
            if (std::abs(relative_error) > 0.01)
            {
                using namespace std;
                cout << "PARAM ERROR: "<< relative_error << endl;
                cout << "   reference_derivative:   " << reference_derivative << endl;
                cout << "   output_derivative: " << output_derivative << endl;
            }

            rs_param.add(std::abs(relative_error));
        }

        // ==================================================================
        // now validate the data gradients
        for (unsigned long i = 0; i < num_data_inputs; ++i)
        {
Davis King's avatar
Davis King committed
1514
            const float oldval = subnetwork.get_output_element(i);
1515
1516
1517
            float eps = oldval*base_eps;
            if (eps == 0)
                eps = base_eps;
Davis King's avatar
Davis King committed
1518
1519
1520
1521
            subnetwork.get_output_element(i) = oldval+eps;
            l.forward(subnetwork, out2);
            subnetwork.get_output_element(i) = oldval-eps;
            l.forward(subnetwork, out3);
1522
1523
1524
1525

            // Compute a reference derivative via a central differences approximation and
            // compare it to the one output by the layer and make sure they match.
            double reference_derivative = (dot(out2,input_grad)-dot(out3, input_grad))/(2*eps);
Davis King's avatar
Davis King committed
1526
            double output_derivative = subnetwork.get_gradient_input_element(i)-initial_gradient_input[i];
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
            double relative_error = (reference_derivative - output_derivative)/(reference_derivative + 1e-100);
            if (std::abs(relative_error) > 0.01)
            {
                using namespace std;
                cout << "DATA ERROR: "<< relative_error << endl;
                cout << "   reference_derivative:   " << reference_derivative << endl;
                cout << "   output_derivative: " << output_derivative << endl;
            }
            rs_data.add(std::abs(relative_error));
        }

        using namespace std;
        if (rs_param.current_n() > 1)
        {
            cout << "rs_param.mean():   " << rs_param.mean() << endl;
            cout << "rs_param.stddev(): " << rs_param.stddev() << endl;
            cout << "rs_param.max():    " << rs_param.max() << endl;
        }
        if (rs_data.current_n() > 1)
        {
            cout << "rs_data.mean():    " << rs_data.mean() << endl;
            cout << "rs_data.stddev():  " << rs_data.stddev() << endl;
            cout << "rs_data.max():     " << rs_data.max() << endl;
        }
    }

// ----------------------------------------------------------------------------------------

}

1557
#endif // DLIB_DNn_CORE_H_
1558
1559