dnn_inception_ex.cpp 6.71 KB
Newer Older
Fm's avatar
Fm committed
1
2
3
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
    This is an example illustrating the use of the deep learning tools from the
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
    dlib C++ Library.  I'm assuming you have already read the introductory
    dnn_mnist_ex.cpp and dnn_mnist_advanced_ex.cpp examples.  In this example we
    are going to show how to create inception networks. 

    An inception network is composed of inception blocks of the form:

               input from SUBNET
              /        |        \
             /         |         \
          block1    block2  ... blockN 
             \         |         /
              \        |        /
          concatenate tensors from blocks
                       |
                    output
                 
    That is, an inception blocks runs a number of smaller networks (e.g. block1,
    block2) and then concatenates their results.  For further reading refer to:
    Szegedy, Christian, et al. "Going deeper with convolutions." Proceedings of
    the IEEE Conference on Computer Vision and Pattern Recognition. 2015.
Fm's avatar
Fm committed
24
25
*/

Fm's avatar
Fm committed
26
27
28
29
30
// DNN module uses template-based network declaration that leads to very long
// type names. Visual Studio will produce Warning C4503 in such cases
#ifdef _MSC_VER
#   pragma warning( disable: 4503 )
#endif
Fm's avatar
Fm committed
31
#include <dlib/dnn.h>
Fm's avatar
Fm committed
32

Fm's avatar
Fm committed
33
34
35
36
37
38
#include <iostream>
#include <dlib/data_io.h>

using namespace std;
using namespace dlib;

39
40
// Inception layer has some different convolutions inside.  Here we define
// blocks as convolutions with different kernel size that we will use in
Fm's avatar
Fm committed
41
// inception layer block.
42
43
44
45
template <typename SUBNET> using block_a1 = relu<con<10,1,1,1,1,SUBNET>>;
template <typename SUBNET> using block_a2 = relu<con<10,3,3,1,1,relu<con<16,1,1,1,1,SUBNET>>>>;
template <typename SUBNET> using block_a3 = relu<con<10,5,5,1,1,relu<con<16,1,1,1,1,SUBNET>>>>;
template <typename SUBNET> using block_a4 = relu<con<10,1,1,1,1,max_pool<3,3,1,1,SUBNET>>>;
Fm's avatar
Fm committed
46

47
48
49
// Here is inception layer definition. It uses different blocks to process input
// and returns combined output.  Dlib includes a number of these inceptionN
// layer types which are themselves created using concat layers.  
Fm's avatar
Fm committed
50
51
template <typename SUBNET> using incept_a = inception4<block_a1,block_a2,block_a3,block_a4, SUBNET>;

52
53
54
// Network can have inception layers of different structure.  It will work
// properly so long as all the sub-blocks inside a particular inception block
// output tensors with the same number of rows and columns.
55
56
57
template <typename SUBNET> using block_b1 = relu<con<4,1,1,1,1,SUBNET>>;
template <typename SUBNET> using block_b2 = relu<con<4,3,3,1,1,SUBNET>>;
template <typename SUBNET> using block_b3 = relu<con<4,1,1,1,1,max_pool<3,3,1,1,SUBNET>>>;
Fm's avatar
Fm committed
58
59
template <typename SUBNET> using incept_b = inception3<block_b1,block_b2,block_b3,SUBNET>;

60
61
// Now we can define a simple network for classifying MNIST digits.  We will
// train and test this network in the code below.
Fm's avatar
Fm committed
62
63
64
65
using net_type = loss_multiclass_log<
        fc<10,
        relu<fc<32,
        max_pool<2,2,2,2,incept_b<
Davis King's avatar
Davis King committed
66
        max_pool<2,2,2,2,incept_a<
Fm's avatar
Fm committed
67
        input<matrix<unsigned char>>
Davis King's avatar
Davis King committed
68
        >>>>>>>>;
Fm's avatar
Fm committed
69
70
71

int main(int argc, char** argv) try
{
Fm's avatar
Fm committed
72
    // This example is going to run on the MNIST dataset.
Fm's avatar
Fm committed
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
    if (argc != 2)
    {
        cout << "This example needs the MNIST dataset to run!" << endl;
        cout << "You can get MNIST from http://yann.lecun.com/exdb/mnist/" << endl;
        cout << "Download the 4 files that comprise the dataset, decompress them, and" << endl;
        cout << "put them in a folder.  Then give that folder as input to this program." << endl;
        return 1;
    }


    std::vector<matrix<unsigned char>> training_images;
    std::vector<unsigned long>         training_labels;
    std::vector<matrix<unsigned char>> testing_images;
    std::vector<unsigned long>         testing_labels;
    load_mnist_dataset(argv[1], training_images, training_labels, testing_images, testing_labels);


90
    // Make an instance of our inception network.
Fm's avatar
Fm committed
91
    net_type net;
92
    cout << "The net has " << net.num_layers << " layers in it." << endl;
Fm's avatar
Fm committed
93
    cout << net << endl;
94
95
96


    cout << "Traning NN..." << endl;
Fm's avatar
Fm committed
97
98
99
100
101
    dnn_trainer<net_type> trainer(net);
    trainer.set_learning_rate(0.01);
    trainer.set_min_learning_rate(0.00001);
    trainer.set_mini_batch_size(128);
    trainer.be_verbose();
Fm's avatar
Fm committed
102
    trainer.set_synchronization_file("inception_sync", std::chrono::seconds(20));
103
    // Train the network.  This might take a few minutes...
Fm's avatar
Fm committed
104
105
106
107
108
109
110
111
112
113
    trainer.train(training_images, training_labels);

    // At this point our net object should have learned how to classify MNIST images.  But
    // before we try it out let's save it to disk.  Note that, since the trainer has been
    // running images through the network, net will have a bunch of state in it related to
    // the last batch of images it processed (e.g. outputs from each layer).  Since we
    // don't care about saving that kind of stuff to disk we can tell the network to forget
    // about that kind of transient data so that our file will be smaller.  We do this by
    // "cleaning" the network before saving it.
    net.clean();
Fm's avatar
Fm committed
114
    serialize("mnist_network_inception.dat") << net;
Fm's avatar
Fm committed
115
    // Now if we later wanted to recall the network from disk we can simply say:
116
    // deserialize("mnist_network_inception.dat") >> net;
Fm's avatar
Fm committed
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137


    // Now let's run the training images through the network.  This statement runs all the
    // images through it and asks the loss layer to convert the network's raw output into
    // labels.  In our case, these labels are the numbers between 0 and 9.
    std::vector<unsigned long> predicted_labels = net(training_images);
    int num_right = 0;
    int num_wrong = 0;
    // And then let's see if it classified them correctly.
    for (size_t i = 0; i < training_images.size(); ++i)
    {
        if (predicted_labels[i] == training_labels[i])
            ++num_right;
        else
            ++num_wrong;
        
    }
    cout << "training num_right: " << num_right << endl;
    cout << "training num_wrong: " << num_wrong << endl;
    cout << "training accuracy:  " << num_right/(double)(num_right+num_wrong) << endl;

138
139
    // Let's also see if the network can correctly classify the testing images.
    // Since MNIST is an easy dataset, we should see 99% accuracy.
Fm's avatar
Fm committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
    predicted_labels = net(testing_images);
    num_right = 0;
    num_wrong = 0;
    for (size_t i = 0; i < testing_images.size(); ++i)
    {
        if (predicted_labels[i] == testing_labels[i])
            ++num_right;
        else
            ++num_wrong;
        
    }
    cout << "testing num_right: " << num_right << endl;
    cout << "testing num_wrong: " << num_wrong << endl;
    cout << "testing accuracy:  " << num_right/(double)(num_right+num_wrong) << endl;

}
catch(std::exception& e)
{
    cout << e.what() << endl;
}