learning_to_track_ex.cpp 15.5 KB
Newer Older
1
2
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*
Davis King's avatar
Davis King committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
    This example shows how you can use the dlib machine learning tools to make
    an object tracker.  Depending on your tracking application there can be a
    lot of components to a tracker.  However, a central element of many trackers
    is the "detection to track" association step and this is the part of the
    tracker we discuss in this example.  Therefore, in the code below we define
    simple detection and track structures and then go through the steps needed
    to learn, using training data, how to best associate detections to tracks.  

    It should be noted that these tools are implemented essentially as wrappers
    around the more general assignment learning tools present in dlib.  So if
    you want to get an idea of how they work under the covers you should read
    the assignment_learning_ex.cpp example program and its supporting
    documentation.  However, to just use the learning-to-track tools you won't
    need to understand these implementation details.
17
18
19
20
21
22
23
24
25
26
27
28
*/


#include <iostream>
#include <dlib/svm_threaded.h>
#include <dlib/rand.h>

using namespace std;
using namespace dlib;

// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
29
struct detection 
30
{
Davis King's avatar
Davis King committed
31
32
33
34
35
36
37
38
    /*
        When you use these tools you need to define two structures.  One represents a
        detection and another a track.  In this example we call these structures detection
        and track but you can name them however you like.  Moreover, You can put anything
        you want in your detection structure.  The only requirement is that detection be
        copyable and contain a public typedef named track_type that tells us the track type
        meant for use with this detection object.
    */
39
40
    typedef class track track_type;

Davis King's avatar
Davis King committed
41
42
43
44
45
46
47
48
49
    
    
    // Again, note that this field is NOT REQUIRED by the dlib tools.  You can put whatever
    // you want in your detection object.  Here we are including a column vector of
    // measurements from the sensor that generated the detection.  In this example we don't
    // have a real sensor so we will simulate a very basic one using a random number
    // generator.   But the idea is that you should be able to use the contents of your
    // detection to somehow tell which track it goes with.  So these numbers should contain
    // some identifying information about the real world object that caused this detection.  
50
51
52
    matrix<double,0,1> measurements;
};

Davis King's avatar
Davis King committed
53
54

struct track
55
{
Davis King's avatar
Davis King committed
56
57
58
59
60
61
62
63
64
65
66
67
68
    /*
        Here we define our corresponding track object.  This object has more requirements
        than the detection.  In particular, the dlib machine learning tools require it to
        have the following elements:
            - A typedef named feature_vector_type
            - It should be copyable and default constructable
            - The three functions: get_similarity_features(), update_track(), and propagate_track()
    
        Just like the detection object, you can also add any additional fields you like.
        In this example we keep it simple and say that a track maintains only a copy of the
        most recent sensor measurements it has seen and also a number telling us how long
        it has been since the track was updated with a detection.
    */
69
70

    // This type should be a dlib::matrix capable of storing column vectors or an
Davis King's avatar
Davis King committed
71
    // unsorted sparse vector type such as std::vector<std::pair<unsigned long,double>>.
72
73
74
75
76
77
78
    typedef matrix<double,0,1> feature_vector_type;

    track()
    {
        time_since_last_association = 0;
    }

Davis King's avatar
Davis King committed
79
    void get_similarity_features(const detection& det, feature_vector_type& feats) const
80
    {
Davis King's avatar
Davis King committed
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
        /*
            The get_similarity_features() function takes a detection and outputs a feature
            vector that tells the machine learning tools how "similar" the detection is to
            the track.  The idea here is to output a set of numbers (i.e. the contents of
            feats) that can be used to decide if det should be associated with this track.
            In this example we output the difference between the last sensor measurements
            for this track and the detection's measurements.  This works since we expect
            the sensor measurements to be relatively constant for each track because that's
            how our simple sensor simulator in this example works.  However, in a real
            world application it's likely to be much more complex.  But here we keep things
            simple.

            It should also be noted that get_similarity_features() must always output
            feature vectors with the same number of dimensions.  Finally, the machine
            learning tools are going to learn a linear function of feats and use that to
            predict if det should associate to this track.  So try and define features that
            you think would work in a linear function.  There are all kinds of ways to do
            this.  If you want to get really clever about it you can even use kernel
            methods like the empirical_kernel_map (see empirical_kernel_map_ex.cpp).  I
            would start out with something simple first though.
        */
102
103
104
        feats = abs(last_measurements - det.measurements);
    }

Davis King's avatar
Davis King committed
105
    void update_track(const detection& det)
106
    {
Davis King's avatar
Davis King committed
107
108
109
110
111
112
113
        /*
            This function is called when the dlib tools have decided that det should be
            associated with this track.  So the point of update_track() is to, as the name
            suggests, update the track with the given detection.  In general, you can do
            whatever you want in this function.  Here we simply record the last measurement
            state and reset the time since last association.
        */
114
115
116
117
        last_measurements = det.measurements;
        time_since_last_association = 0;
    }

Davis King's avatar
Davis King committed
118
    void propagate_track()
119
    {
Davis King's avatar
Davis King committed
120
121
122
123
124
125
126
127
128
        /*
            This function is called when the dlib tools have decided, for the current time
            step, that none of the available detections associate with this track.  So the
            point of this function is to perform a track update without a detection.  To
            say that another way.  Every time you ask the dlib tools to perform detection
            to track association they will update each track by calling either
            update_track() or propagate_track().  Which function they call depends on
            whether or not a detection was associated to the track.
        */
129
130
131
132
133
134
135
136
137
        ++time_since_last_association;
    }

    matrix<double,0,1> last_measurements;
    unsigned long time_since_last_association;
};

// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
138
139
140
141
142
143
144
145
146
147
148
/*
    Now that we have defined our detection and track structures we are going to define our
    sensor simulator.  In it we will imagine that there are num_objects things in the world
    and those things generate detections from our sensor.  Moreover, each detection from
    the sensor comes with a measurement vector with num_properties elements.  

    So the first function, initialize_object_properties(), just randomly generates
    num_objects and saves them in a global variable.  Then when we are generating
    detections we will output copies of these objects that have been corrupted by a little
    bit of random noise.
*/
149
150
151
152
153
154
155
156
157
158
159
160

dlib::rand rnd;
const long num_objects = 4;
const long num_properties = 6;
std::vector<matrix<double,0,1> > object_properties(num_objects);

void initialize_object_properties()
{
    for (unsigned long i = 0; i < object_properties.size(); ++i)
        object_properties[i] = randm(num_properties,1,rnd);
}

Davis King's avatar
Davis King committed
161
162
163
164
165
166
167
168
169
170
171
172
173
174
// So here is our function that samples a detection from our simulated sensor.  You tell it
// what object you want to sample a detection from and it returns a detection from that
// object.
detection sample_detection_from_sensor(unsigned long object_id)
{
    DLIB_CASSERT(object_id < num_objects, 
        "You can't ask to sample a detection from an object that doesn't exist."); 
    detection temp;
    // Set the measurements equal to the object's true property values plus a little bit of
    // noise.
    temp.measurements = object_properties[object_id] + randm(num_properties,1,rnd)*0.1;
    return temp;
}

175
176
// ----------------------------------------------------------------------------------------

Davis King's avatar
Davis King committed
177
178
179
typedef std::vector<labeled_detection<detection> > detections_at_single_time_step;
typedef std::vector<detections_at_single_time_step> track_history;

180
181
track_history make_random_tracking_data_for_training()
{
Davis King's avatar
Davis King committed
182
183
184
185
186
187
188
    /*
        Since we are using machine learning we need some training data.  This function
        samples data from our sensor and creates labeled track histories.  In these track
        histories, each detection is labeled with its true track ID.  The goal of the
        machine learning tools will then be to learn to associate all the detections with
        the same ID to the same track object.
    */
189

Davis King's avatar
Davis King committed
190
    track_history data;
191

Davis King's avatar
Davis King committed
192
193
    // At each time step we get a set of detections from the objects in the world.
    // Simulate 100 time steps worth of data where there are 3 objects present. 
194
195
196
197
    const int num_time_steps = 100;
    for (int i = 0; i < num_time_steps; ++i)
    {
        detections_at_single_time_step dets(3);
Davis King's avatar
Davis King committed
198
199
        // sample a detection from object 0
        dets[0].det = sample_detection_from_sensor(0);
200
201
        dets[0].label = 0;

Davis King's avatar
Davis King committed
202
203
        // sample a detection from object 1
        dets[1].det = sample_detection_from_sensor(1);
204
205
        dets[1].label = 1;

Davis King's avatar
Davis King committed
206
207
        // sample a detection from object 2
        dets[2].det = sample_detection_from_sensor(2);
208
209
210
211
212
        dets[2].label = 2;

        data.push_back(dets);
    }

Davis King's avatar
Davis King committed
213
    // Now let's imagine object 1 and 2 are gone but a new object, object 3 has arrived.  
214
215
216
    for (int i = 0; i < num_time_steps; ++i)
    {
        detections_at_single_time_step dets(2);
Davis King's avatar
Davis King committed
217
218
        // sample a detection from object 0
        dets[0].det = sample_detection_from_sensor(0);
219
220
        dets[0].label = 0;

Davis King's avatar
Davis King committed
221
222
        // sample a detection from object 3
        dets[1].det = sample_detection_from_sensor(3);
223
224
225
226
227
228
229
230
231
232
233
234
        dets[1].label = 3;

        data.push_back(dets);
    }

    return data;
}

// ----------------------------------------------------------------------------------------

std::vector<detection> make_random_detections(unsigned long num_dets)
{
Davis King's avatar
Davis King committed
235
236
237
238
239
240
241
    /*
        Finally, when we test the tracker we learned we will need to sample regular old
        unlabeled detections.  This function helps us do that.
    */
    DLIB_CASSERT(num_dets <= num_objects, 
        "You can't ask for more detections than there are objects in our little simulation."); 

242
243
244
    std::vector<detection> dets(num_dets);
    for (unsigned long i = 0; i < dets.size(); ++i)
    {
Davis King's avatar
Davis King committed
245
        dets[i] = sample_detection_from_sensor(i);
246
247
248
249
250
251
252
253
254
255
256
    }
    return dets;
}

// ----------------------------------------------------------------------------------------

int main()
{
    initialize_object_properties();


Davis King's avatar
Davis King committed
257
258
259
260
    // Get some training data.  Here we sample 5 independent track histories.  In a real
    // world problem you would get this kind of data by, for example, collecting data from
    // your sensor on 5 separate days where you did an independent collection each day.
    // You can train a model with just one track history but the more you have the better.
261
262
263
264
265
266
267
268
269
    std::vector<track_history> data;
    data.push_back(make_random_tracking_data_for_training());
    data.push_back(make_random_tracking_data_for_training());
    data.push_back(make_random_tracking_data_for_training());
    data.push_back(make_random_tracking_data_for_training());
    data.push_back(make_random_tracking_data_for_training());


    structural_track_association_trainer trainer;
Davis King's avatar
Davis King committed
270
271
272
273
274
    // Note that the machine learning tools have a parameter.  This is the usual SVM C
    // parameter that controls the trade-off between trying to fit the training data or
    // producing a "simpler" solution.  You need to try a few different values of this
    // parameter to find out what setting works best for your problem (try values in the
    // range 0.001 to 1000000).
275
    trainer.set_c(100);
Davis King's avatar
Davis King committed
276
    // Now do the training.
277
278
    track_association_function<detection> assoc = trainer.train(data);

Davis King's avatar
Davis King committed
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
    // We can test the accuracy of the learned association function on some track history
    // data.  Here we test it on the data we trained on.  It outputs a single number that
    // measures the fraction of detections which were correctly associated to their tracks.
    // So a value of 1 indicates perfect tracking and a value of 0 indicates totally wrong
    // tracking.
    cout << "Association accuracy on training data: "<< test_track_association_function(assoc, data) << endl;
    // It's very important to test the output of a machine learning method on data it
    // wasn't trained on.  You can do that by calling test_track_association_function() on
    // held out data.  You can also use cross-validation like so:
    cout << "Association accuracy from 5-fold CV:   "<< cross_validate_track_association_trainer(trainer, data, 5) << endl;
    // Unsurprisingly, the testing functions show that the assoc function we learned
    // perfectly associates all detections to tracks in this easy data.




    // OK.  So how do you use this assoc thing?  Let's use it to do some tracking!
296

Davis King's avatar
Davis King committed
297
    // tracks contains all our current tracks.  Initially it is empty.
298
    std::vector<track> tracks;
Davis King's avatar
Davis King committed
299
    cout << "number of tracks: "<< tracks.size() << endl;
300

Davis King's avatar
Davis King committed
301
302
303
304
305
306
307
308
    // Sample detections from 3 objects.
    std::vector<detection> dets = make_random_detections(3);
    // Calling assoc(), the function we just learned, performs the detection to track
    // association.  It will also call each track's update_track() function with the
    // associated detection.  For tracks that don't get a detection, it calls
    // propagate_track(). 
    assoc(tracks, dets);
    // Now there are 3 things in tracks.
309
    cout << "number of tracks: "<< tracks.size() << endl;
Davis King's avatar
Davis King committed
310
311

    // Run the tracker for a few more time steps...
312
313
314
315
316
317
318
319
    dets = make_random_detections(3);
    assoc(tracks, dets);
    cout << "number of tracks: "<< tracks.size() << endl;

    dets = make_random_detections(3);
    assoc(tracks, dets);
    cout << "number of tracks: "<< tracks.size() << endl;

Davis King's avatar
Davis King committed
320
    // Now another object has appeared!  There are 4 objects now.
321
322
    dets = make_random_detections(4);
    assoc(tracks, dets);
Davis King's avatar
Davis King committed
323
    // Now there are 4 tracks instead of 3!
324
325
    cout << "number of tracks: "<< tracks.size() << endl;

Davis King's avatar
Davis King committed
326
327
328
    // That 4th object just vanished.  Let's look at the time_since_last_association values
    // for each track.  We will see that one of the tracks isn't getting updated with
    // detections anymore since the object it corresponds to is no longer present.
329
330
331
332
333
334
335
336
337
338
339
340
341
    dets = make_random_detections(3);
    assoc(tracks, dets);
    cout << "number of tracks: "<< tracks.size() << endl;
    for (unsigned long i = 0; i < tracks.size(); ++i)
        cout << "   time since last association: "<< tracks[i].time_since_last_association << endl;

    dets = make_random_detections(3);
    assoc(tracks, dets);
    cout << "number of tracks: "<< tracks.size() << endl;
    for (unsigned long i = 0; i < tracks.size(); ++i)
        cout << "   time since last association: "<< tracks[i].time_since_last_association << endl;


Davis King's avatar
Davis King committed
342
343
344
345
346




    // Finally, you can save your track_association_function to disk like so:
347
348
349
350
    ofstream fout("track_assoc.svm", ios::binary);
    serialize(assoc, fout);
    fout.close();

Davis King's avatar
Davis King committed
351
    // And recall it from disk later like so:
352
353
354
355
356
357
    ifstream fin("track_assoc.svm", ios::binary);
    deserialize(assoc, fin);
}

// ----------------------------------------------------------------------------------------