"vscode:/vscode.git/clone" did not exist on "8ccc76ab3760cdb1ab60c7a344e16f118bb58adc"
dnn_inception_ex.cpp 7.52 KB
Newer Older
Fm's avatar
Fm committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
    This is an example illustrating the use of the deep learning tools from the
    dlib C++ Library.  I'm assuming you have already read the dnn_mnist_ex.cpp
    example.  So in this example program I'm going to go over a number of more
    advanced parts of the API, including:
        - Using grp layer for constructing inception layer

    Inception layer is a kind of NN architecture for running sevelar convolution types
    on the same input area and joining all convolution results into one output.
    For further reading refer http://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf
*/

#include <dlib/dnn.h>
#include <iostream>
#include <dlib/data_io.h>

using namespace std;
using namespace dlib;

Fm's avatar
Fm committed
21
22
23
// Inception layer has some different convolutions inside
// Here we define blocks as convolutions with different kernel size that we will use in
// inception layer block.
24
25
26
27
template <typename SUBNET> using block_a1 = relu<con<10,1,1,1,1,SUBNET>>;
template <typename SUBNET> using block_a2 = relu<con<10,3,3,1,1,relu<con<16,1,1,1,1,SUBNET>>>>;
template <typename SUBNET> using block_a3 = relu<con<10,5,5,1,1,relu<con<16,1,1,1,1,SUBNET>>>>;
template <typename SUBNET> using block_a4 = relu<con<10,1,1,1,1,max_pool<3,3,1,1,SUBNET>>>;
Fm's avatar
Fm committed
28
29
30
31
32
33

// Here is inception layer definition. It uses different blocks to process input and returns combined output
template <typename SUBNET> using incept_a = inception4<block_a1,block_a2,block_a3,block_a4, SUBNET>;

// Network can have inception layers of different structure.
// Here are blocks with different convolutions
34
35
36
template <typename SUBNET> using block_b1 = relu<con<4,1,1,1,1,SUBNET>>;
template <typename SUBNET> using block_b2 = relu<con<4,3,3,1,1,SUBNET>>;
template <typename SUBNET> using block_b3 = relu<con<4,1,1,1,1,max_pool<3,3,1,1,SUBNET>>>;
Fm's avatar
Fm committed
37
38
39
40
41
42
43
44
45

// Here is inception layer definition. It uses different blocks to process input and returns combined output
template <typename SUBNET> using incept_b = inception3<block_b1,block_b2,block_b3,SUBNET>;

// and then the network type is
using net_type = loss_multiclass_log<
        fc<10,
        relu<fc<32,
        max_pool<2,2,2,2,incept_b<
46
        max_pool<2,2,2,2,tag1<incept_a<
Fm's avatar
Fm committed
47
        input<matrix<unsigned char>>
48
        >>>>>>>>>;
Fm's avatar
Fm committed
49
50
51

int main(int argc, char** argv) try
{
Fm's avatar
Fm committed
52
    // This example is going to run on the MNIST dataset.
Fm's avatar
Fm committed
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
    if (argc != 2)
    {
        cout << "This example needs the MNIST dataset to run!" << endl;
        cout << "You can get MNIST from http://yann.lecun.com/exdb/mnist/" << endl;
        cout << "Download the 4 files that comprise the dataset, decompress them, and" << endl;
        cout << "put them in a folder.  Then give that folder as input to this program." << endl;
        return 1;
    }


    std::vector<matrix<unsigned char>> training_images;
    std::vector<unsigned long>         training_labels;
    std::vector<matrix<unsigned char>> testing_images;
    std::vector<unsigned long>         testing_labels;
    load_mnist_dataset(argv[1], training_images, training_labels, testing_images, testing_labels);


Fm's avatar
Fm committed
70
    // Create network of predefined type.
Fm's avatar
Fm committed
71
72
    net_type net;

73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
    // Now let's print the details of the pnet to the screen and inspect it.
    cout << "The net has " << net.num_layers << " layers in it." << endl;
    cout << net << endl;

    // we can access inner layers with layer<> function:
    // with tags
    auto& in_b = layer<tag1>(net);
    cout << "Found inception B layer: " << endl << in_b << endl;
    // and we can access layers inside inceptions with itags
    auto& in_b_1 = layer<itag1>(in_b);
    cout << "Found inception B/1 layer: " << endl << in_b_1 << endl;
    // or this is identical to
    auto& in_b_1_a = layer<tag1,2>(net);
    cout << "Found inception B/1 layer alternative way: " << endl << in_b_1_a << endl;

    cout << "Traning NN..." << endl;
    // The rest of the sample is identical to dnn_minst_ex
Fm's avatar
Fm committed
90
91
92
93
94
95
96
97
98
99
100
101
102
    // And then train it using the MNIST data.  The code below uses mini-batch stochastic
    // gradient descent with an initial learning rate of 0.01 to accomplish this.
    dnn_trainer<net_type> trainer(net);
    trainer.set_learning_rate(0.01);
    trainer.set_min_learning_rate(0.00001);
    trainer.set_mini_batch_size(128);
    trainer.be_verbose();
    // Since DNN training can take a long time, we can ask the trainer to save its state to
    // a file named "mnist_sync" every 20 seconds.  This way, if we kill this program and
    // start it again it will begin where it left off rather than restarting the training
    // from scratch.  This is because, when the program restarts, this call to
    // set_synchronization_file() will automatically reload the settings from mnist_sync if
    // the file exists.
Fm's avatar
Fm committed
103
    trainer.set_synchronization_file("inception_sync", std::chrono::seconds(20));
Fm's avatar
Fm committed
104
105
106
107
    // Finally, this line begins training.  By default, it runs SGD with our specified
    // learning rate until the loss stops decreasing.  Then it reduces the learning rate by
    // a factor of 10 and continues running until the loss stops decreasing again.  It will
    // keep doing this until the learning rate has dropped below the min learning rate
Fm's avatar
Fm committed
108
    // defined above or the maximum number of epochs as been executed (defaulted to 10000).
Fm's avatar
Fm committed
109
110
111
112
113
114
115
116
117
118
    trainer.train(training_images, training_labels);

    // At this point our net object should have learned how to classify MNIST images.  But
    // before we try it out let's save it to disk.  Note that, since the trainer has been
    // running images through the network, net will have a bunch of state in it related to
    // the last batch of images it processed (e.g. outputs from each layer).  Since we
    // don't care about saving that kind of stuff to disk we can tell the network to forget
    // about that kind of transient data so that our file will be smaller.  We do this by
    // "cleaning" the network before saving it.
    net.clean();
Fm's avatar
Fm committed
119
    serialize("mnist_network_inception.dat") << net;
Fm's avatar
Fm committed
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
    // Now if we later wanted to recall the network from disk we can simply say:
    // deserialize("mnist_network.dat") >> net;


    // Now let's run the training images through the network.  This statement runs all the
    // images through it and asks the loss layer to convert the network's raw output into
    // labels.  In our case, these labels are the numbers between 0 and 9.
    std::vector<unsigned long> predicted_labels = net(training_images);
    int num_right = 0;
    int num_wrong = 0;
    // And then let's see if it classified them correctly.
    for (size_t i = 0; i < training_images.size(); ++i)
    {
        if (predicted_labels[i] == training_labels[i])
            ++num_right;
        else
            ++num_wrong;
        
    }
    cout << "training num_right: " << num_right << endl;
    cout << "training num_wrong: " << num_wrong << endl;
    cout << "training accuracy:  " << num_right/(double)(num_right+num_wrong) << endl;

    // Let's also see if the network can correctly classify the testing images.  Since
    // MNIST is an easy dataset, we should see at least 99% accuracy.
    predicted_labels = net(testing_images);
    num_right = 0;
    num_wrong = 0;
    for (size_t i = 0; i < testing_images.size(); ++i)
    {
        if (predicted_labels[i] == testing_labels[i])
            ++num_right;
        else
            ++num_wrong;
        
    }
    cout << "testing num_right: " << num_right << endl;
    cout << "testing num_wrong: " << num_wrong << endl;
    cout << "testing accuracy:  " << num_right/(double)(num_right+num_wrong) << endl;

}
catch(std::exception& e)
{
    cout << e.what() << endl;
}