train_object_detector.py 7.61 KB
Newer Older
1
2
3
#!/usr/bin/python
# The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
#
Davis King's avatar
Davis King committed
4
5
6
7
8
# This example program shows how you can use dlib to make a HOG based object
# detector for things like faces, pedestrians, and any other semi-rigid
# object.  In particular, we go though the steps to train the kind of sliding
# window object detector first published by Dalal and Triggs in 2005 in the
# paper Histograms of Oriented Gradients for Human Detection.
9
#
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#
# COMPILING/INSTALLING THE DLIB PYTHON INTERFACE
#   You can install dlib using the command:
#       pip install dlib
#
#   Alternatively, if you want to compile dlib yourself then go into the dlib
#   root folder and run:
#       python setup.py install
#   or
#       python setup.py install --yes USE_AVX_INSTRUCTIONS
#   if you have a CPU that supports AVX instructions, since this makes some
#   things run faster.  
#
#   Compiling dlib should work on any operating system so long as you have
24
25
26
#   CMake installed.  On Ubuntu, this can be done easily by running the
#   command:
#       sudo apt-get install cmake
27
#
28
#   Also note that this example requires Numpy which can be installed
29
#   via the command:
30
#       pip install numpy
31

32
33
34
35
36
37
import os
import sys
import glob

import dlib

38
39
40
41
# In this example we are going to train a face detector based on the small
# faces dataset in the examples/faces directory.  This means you need to supply
# the path to this faces folder as a command line argument so we will know
# where it is.
42
43
44
45
46
47
if len(sys.argv) != 2:
    print(
        "Give the path to the examples/faces directory as the argument to this "
        "program. For example, if you are in the python_examples folder then "
        "execute this program by running:\n"
        "    ./train_object_detector.py ../examples/faces")
48
    exit()
49
50
faces_folder = sys.argv[1]

51

Davis King's avatar
Davis King committed
52
# Now let's do the training.  The train_simple_object_detector() function has a
53
54
55
56
57
58
59
60
61
62
63
# bunch of options, all of which come with reasonable default values.  The next
# few lines goes over some of these options.
options = dlib.simple_object_detector_training_options()
# Since faces are left/right symmetric we can tell the trainer to train a
# symmetric detector.  This helps it get the most value out of the training
# data.
options.add_left_right_image_flips = True
# The trainer is a kind of support vector machine and therefore has the usual
# SVM C parameter.  In general, a bigger C encourages it to fit the training
# data better but might lead to overfitting.  You must find the best C value
# empirically by checking how well the trained detector works on a test set of
Davis King's avatar
Davis King committed
64
# images you haven't trained on.  Don't just leave the value set at 5.  Try a
65
# few different C values and see what works best for your data.
66
options.C = 5
67
68
# Tell the code how many CPU cores your computer has for the fastest training.
options.num_threads = 4
69
options.be_verbose = True
70

71
72
73

training_xml_path = os.path.join(faces_folder, "training.xml")
testing_xml_path = os.path.join(faces_folder, "testing.xml")
74
75
76
77
78
79
80
81
# This function does the actual training.  It will save the final detector to
# detector.svm.  The input is an XML file that lists the images in the training
# dataset and also contains the positions of the face boxes.  To create your
# own XML files you can use the imglab tool which can be found in the
# tools/imglab folder.  It is a simple graphical tool for labeling objects in
# images with boxes.  To see how to use it read the tools/imglab/README.txt
# file.  But for this example, we just use the training.xml file included with
# dlib.
82
dlib.train_simple_object_detector(training_xml_path, "detector.svm", options)
83

84
85


86
# Now that we have a face detector we can test it.  The first statement tests
87
# it on the training data.  It will print(the precision, recall, and then)
88
# average precision.
89
90
print("")  # Print blank line to create gap from previous output
print("Training accuracy: {}".format(
91
    dlib.test_simple_object_detector(training_xml_path, "detector.svm")))
92
93
94
# However, to get an idea if it really worked without overfitting we need to
# run it on images it wasn't trained on.  The next line does this.  Happily, we
# see that the object detector works perfectly on the testing images.
95
print("Testing accuracy: {}".format(
96
    dlib.test_simple_object_detector(testing_xml_path, "detector.svm")))
97

98
99
100
101




102
103
104
105
106
107
108
109
# Now let's use the detector as you would in a normal application.  First we
# will load it from disk.
detector = dlib.simple_object_detector("detector.svm")

# We can look at the HOG filter we learned.  It should look like a face.  Neat!
win_det = dlib.image_window()
win_det.set_image(detector)

Davis King's avatar
Davis King committed
110
# Now let's run the detector over the images in the faces folder and display the
111
# results.
112
print("Showing detections on the images in the faces folder...")
113
win = dlib.image_window()
114
for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
115
    print("Processing file: {}".format(f))
116
    img = dlib.load_rgb_image(f)
117
    dets = detector(img)
118
119
120
121
    print("Number of faces detected: {}".format(len(dets)))
    for k, d in enumerate(dets):
        print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
            k, d.left(), d.top(), d.right(), d.bottom()))
122
123
124
125

    win.clear_overlay()
    win.set_image(img)
    win.add_overlay(dets)
126
    dlib.hit_enter_to_continue()
127

128
129
130
131
132
133
134
135
136
# Next, suppose you have trained multiple detectors and you want to run them
# efficiently as a group.  You can do this as follows:
detector1 = dlib.fhog_object_detector("detector.svm")
# In this example we load detector.svm again since it's the only one we have on
# hand. But in general it would be a different detector.
detector2 = dlib.fhog_object_detector("detector.svm") 
# make a list of all the detectors you wan to run.  Here we have 2, but you
# could have any number.
detectors = [detector1, detector2]
137
image = dlib.load_rgb_image(faces_folder + '/2008_002506.jpg')
138
139
140
141
[boxes, confidences, detector_idxs] = dlib.fhog_object_detector.run_multiple(detectors, image, upsample_num_times=1, adjust_threshold=0.0)
for i in range(len(boxes)):
    print("detector {} found box {} with confidence {}.".format(detector_idxs[i], boxes[i], confidences[i]))

142
143
144
145
146
147
# Finally, note that you don't have to use the XML based input to
# train_simple_object_detector().  If you have already loaded your training
# images and bounding boxes for the objects then you can call it as shown
# below.

# You just need to put your images into a list.
148
149
images = [dlib.load_rgb_image(faces_folder + '/2008_002506.jpg'),
          dlib.load_rgb_image(faces_folder + '/2009_004587.jpg')]
150
151
# Then for each image you make a list of rectangles which give the pixel
# locations of the edges of the boxes.
152
153
154
155
156
boxes_img1 = ([dlib.rectangle(left=329, top=78, right=437, bottom=186),
               dlib.rectangle(left=224, top=95, right=314, bottom=185),
               dlib.rectangle(left=125, top=65, right=214, bottom=155)])
boxes_img2 = ([dlib.rectangle(left=154, top=46, right=228, bottom=121),
               dlib.rectangle(left=266, top=280, right=328, bottom=342)])
157
158
159
160
# And then you aggregate those lists of boxes into one big list and then call
# train_simple_object_detector().
boxes = [boxes_img1, boxes_img2]

161
detector2 = dlib.train_simple_object_detector(images, boxes, options)
162
# We could save this detector to disk by uncommenting the following.
163
#detector2.save('detector2.svm')
164

165
# Now let's look at its HOG filter!
166
win_det.set_image(detector2)
167
dlib.hit_enter_to_continue()
168

169
170
171
172
# Note that you don't have to use the XML based input to
# test_simple_object_detector().  If you have already loaded your training
# images and bounding boxes for the objects then you can call it as shown
# below.
173
174
print("\nTraining accuracy: {}".format(
    dlib.test_simple_object_detector(images, boxes, detector2)))