Cleaned up code and comments.

In particular, these new functions don't need to be inside the face recognition class. So I moved them out. I also fixed many incorrect copy/pasted comments and clarified parts of the example code.

Cleaned up code and comments.
In particular, these new functions don't need to be inside the face recognition class. So I moved them out. I also fixed many incorrect copy/pasted comments and clarified parts of the example code.
53255262 · Davis King · 5cf80dda · 53255262 · 53255262
Commit 53255262 authored Sep 16, 2017 by Davis King
Hide whitespace changes
Inline Side-by-side

Showing with 95 additions and 111 deletions

python_examples/face_clustering.py python_examples/face_clustering.py +13 -29

tools/python/src/face_recognition.cpp tools/python/src/face_recognition.cpp +82 -82

No files found.
--- a/python_examples/face_clustering.py
+++ b/python_examples/face_clustering.py
@@ -42,9 +42,9 @@ from skimage import io
 if len(sys.argv) != 5:
    print(
        "Call this program like this:\n"
-        "   ./face_clustering.py shape_predictor_68_face_landmarks.dat dlib_face_recognition_resnet_model_v1.dat ../examples/faces output_folder\n"
+        "   ./face_clustering.py shape_predictor_5_face_landmarks.dat dlib_face_recognition_resnet_model_v1.dat ../examples/faces output_folder\n"
        "You can download a trained facial shape predictor and recognition model from:\n"
-        "    http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2\n"
+        "    http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2\n"
        "    http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2")
    exit()
@@ -63,7 +63,7 @@ facerec = dlib.face_recognition_model_v1(face_rec_model_path)
 descriptors = []
 images = []
-# Now process all the images
+# Now find all the faces and compute 128D face descriptors for each face.
 for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
    print("Processing file: {}".format(f))
    img = io.imread(f)
@@ -78,34 +78,17 @@ for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
    for k, d in enumerate(dets):
        # Get the landmarks/parts for the face in box d.
        shape = sp(img, d)
-        # Draw the face landmarks on the screen so we can see what face is currently being processed.
        # Compute the 128D vector that describes the face in img identified by
-        # shape.  In general, if two face descriptor vectors have a Euclidean
+        # shape.  
-        # distance between them less than 0.6 then they are from the same
-        # person, otherwise they are from different people. Here we just print
-        # the vector to the screen.
        face_descriptor = facerec.compute_face_descriptor(img, shape)
        descriptors.append(face_descriptor)
        images.append((img, shape))
-        # It should also be noted that you can also call this function like this:
-        #  face_descriptor = facerec.compute_face_descriptor(img, shape, 100)
+# Now let's cluster the faces.  
-        # The version of the call without the 100 gets 99.13% accuracy on LFW
+labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
-        # while the version with 100 gets 99.38%.  However, the 100 makes the
+num_classes = len(set(labels))
-        # call 100x slower to execute, so choose whatever version you like.  To
-        # explain a little, the 3rd argument tells the code how many times to
-        # jitter/resample the image.  When you set it to 100 it executes the
-        # face descriptor extraction 100 times on slightly modified versions of
-        # the face and returns the average result.  You could also pick a more
-        # middle value, such as 10, which is only 10x slower but still gets an
-        # LFW accuracy of 99.3%.
-labels = facerec.cluster(descriptors, 0.5)
-label_classes = list(set(labels))
-label_classes.sort()
-num_classes = len(label_classes)
 print("Number of clusters: {}".format(num_classes))
-print("Labels classes: {}".format(str(label_classes)))
 # Find biggest class
 biggest_class = None
@@ -116,8 +99,8 @@ for i in range(0, num_classes):
        biggest_class_length = class_length
        biggest_class = i
-print("Biggest class: {}".format(biggest_class))
+print("Biggest cluster id number: {}".format(biggest_class))
-print("Biggest class length: {}".format(biggest_class_length))
+print("Number of faces in biggest cluster: {}".format(biggest_class_length))
 # Find the indices for the biggest class
 indices = []
@@ -125,17 +108,18 @@ for i, label in enumerate(labels):
    if label == biggest_class:
        indices.append(i)
-print("Biggest class indices: {}".format(str(indices)))
+print("Indices of images in the biggest cluster: {}".format(str(indices)))
 # Ensure output directory exists
 if not os.path.isdir(output_folder_path):
    os.makedirs(output_folder_path)
 # Save the extracted faces
+print("Saving faces in largest cluster to output folder...")
 for i, index in enumerate(indices):
    img, shape = images[index]
    file_path = os.path.join(output_folder_path, "face_" + str(i))
-    facerec.save_image_chip(img, shape, file_path)
+    dlib.save_face_chip(img, shape, file_path)

--- a/tools/python/src/face_recognition.cpp
+++ b/tools/python/src/face_recognition.cpp
@@ -39,79 +39,6 @@ public:
        cropper->set_max_rotation_degrees(3);
    }
-    boost::python::list cluster(boost::python::list descriptors, float threshold)
-    {
-        boost::python::list clusters;
-        size_t num_descriptors = len(descriptors);
-        // In particular, one simple thing we can do is face clustering.  This next bit of code
-        // creates a graph of connected faces and then uses the Chinese whispers graph clustering
-        // algorithm to identify how many people there are and which faces belong to whom.
-        std::vector<sample_pair> edges;
-        std::vector<unsigned long> labels;
-        for (size_t i = 0; i < num_descriptors; ++i)
-        {
-            for (size_t j = i+1; j < num_descriptors; ++j)
-            {
-                // Faces are connected in the graph if they are close enough.  Here we check if
-                // the distance between two face descriptors is less than 0.6, which is the
-                // decision threshold the network was trained to use.  Although you can
-                // certainly use any other threshold you find useful.
-                matrix<double,0,1> first_descriptor = boost::python::extract<matrix<double,0,1>>(descriptors[i]);
-                matrix<double,0,1> second_descriptor = boost::python::extract<matrix<double,0,1>>(descriptors[j]);
-                if (length(first_descriptor-second_descriptor) < threshold)
-                    edges.push_back(sample_pair(i,j));
-            }
-        }
-        const auto num_clusters = chinese_whispers(edges, labels);
-        for (size_t i = 0; i < labels.size(); ++i)
-        {
-            clusters.append(labels[i]);
-        }
-        return clusters;
-    }
-    void save_image_chip (
-        object img,
-        const full_object_detection& face,
-        const std::string& chip_filename
-    )
-    {
-        std::vector<full_object_detection> faces(1, face);
-        save_image_chips(img, faces, chip_filename);
-        return;
-    }
-    void save_image_chips (
-        object img,
-        const std::vector<full_object_detection>& faces,
-        const std::string& chip_filename
-    )
-    {
-        int num_faces = faces.size();
-        std::vector<chip_details> dets;
-        for (auto& f : faces)
-            dets.push_back(get_face_chip_details(f, 150, 0.25));
-        dlib::array<matrix<rgb_pixel>> face_chips;
-        extract_image_chips(numpy_rgb_image(img), dets, face_chips);
-        int i=0;
-        for (auto& chip : face_chips) {
-            i++;
-            if(num_faces > 1) 
-            {
-                const std::string& file_name = chip_filename + "_" + std::to_string(i) + ".jpg";
-                save_jpeg(chip, file_name);
-            }
-            else
-            {
-                const std::string& file_name = chip_filename + ".jpg";
-                save_jpeg(chip, file_name);
-            }
-        }
-    }
    matrix<double,0,1> compute_face_descriptor (
        object img,
        const full_object_detection& face,
@@ -215,6 +142,78 @@ private:
    anet_type net;
 };
+// ----------------------------------------------------------------------------------------
+boost::python::list chinese_whispers_clustering(boost::python::list descriptors, float threshold)
+{
+    boost::python::list clusters;
+    size_t num_descriptors = len(descriptors);
+    // This next bit of code creates a graph of connected objects and then uses the Chinese
+    // whispers graph clustering algorithm to identify how many objects there are and which
+    // objects belong to which cluster.
+    std::vector<sample_pair> edges;
+    std::vector<unsigned long> labels;
+    for (size_t i = 0; i < num_descriptors; ++i)
+    {
+        for (size_t j = i+1; j < num_descriptors; ++j)
+        {
+            matrix<double,0,1>& first_descriptor = boost::python::extract<matrix<double,0,1>&>(descriptors[i]);
+            matrix<double,0,1>& second_descriptor = boost::python::extract<matrix<double,0,1>&>(descriptors[j]);
+            if (length(first_descriptor-second_descriptor) < threshold)
+                edges.push_back(sample_pair(i,j));
+        }
+    }
+    const auto num_clusters = chinese_whispers(edges, labels);
+    for (size_t i = 0; i < labels.size(); ++i)
+    {
+        clusters.append(labels[i]);
+    }
+    return clusters;
+}
+void save_face_chips (
+    object img,
+    const std::vector<full_object_detection>& faces,
+    const std::string& chip_filename
+)
+{
+    int num_faces = faces.size();
+    std::vector<chip_details> dets;
+    for (auto& f : faces)
+        dets.push_back(get_face_chip_details(f, 150, 0.25));
+    dlib::array<matrix<rgb_pixel>> face_chips;
+    extract_image_chips(numpy_rgb_image(img), dets, face_chips);
+    int i=0;
+    for (auto& chip : face_chips) 
+    {
+        i++;
+        if(num_faces > 1) 
+        {
+            const std::string& file_name = chip_filename + "_" + std::to_string(i) + ".jpg";
+            save_jpeg(chip, file_name);
+        }
+        else
+        {
+            const std::string& file_name = chip_filename + ".jpg";
+            save_jpeg(chip, file_name);
+        }
+    }
+}
+void save_face_chip (
+    object img,
+    const full_object_detection& face,
+    const std::string& chip_filename
+)
+{
+    std::vector<full_object_detection> faces(1, face);
+    save_face_chips(img, faces, chip_filename);
+    return;
+}
 // ----------------------------------------------------------------------------------------
@@ -230,18 +229,19 @@ void bind_face_recognition()
        .def("compute_face_descriptor", &face_recognition_model_v1::compute_face_descriptors, (arg("img"),arg("faces"),arg("num_jitters")=0),
            "Takes an image and an array of full_object_detections that reference faces in that image and converts them into 128D face descriptors.  "
            "If num_jitters>1 then each face will be randomly jittered slightly num_jitters times, each run through the 128D projection, and the average used as the face descriptor."
-            )
-        .def("save_image_chip", &face_recognition_model_v1::save_image_chip, (arg("img"),arg("face"),arg("chip_filename")),
-            "Takes an image and a full_object_detection that references a face in that image and saves the face with the specified file name prefix"
-            )
-        .def("save_image_chips", &face_recognition_model_v1::save_image_chips, (arg("img"),arg("faces"),arg("chip_filename")),
-            "Takes an image and a full_object_detections object that reference faces in that image and saves the faces with the specified file name prefix"
-            )
-        .def("cluster", &face_recognition_model_v1::cluster, (arg("descriptors"), arg("threshold")),
-            "Takes a list of descriptors and returns a list that contains a label for each descriptor. Clustering is done using chinese_whispers."
            );
    }
+    def("save_face_chip", &save_face_chip, (arg("img"),arg("face"),arg("chip_filename")),
+        "Takes an image and a full_object_detection that references a face in that image and saves the face with the specified file name prefix.  The face will be rotated upright and scaled to 150x150 pixels."
+        );
+    def("save_face_chips", &save_face_chips, (arg("img"),arg("faces"),arg("chip_filename")),
+        "Takes an image and a full_object_detections object that reference faces in that image and saves the faces with the specified file name prefix.  The faces will be rotated upright and scaled to 150x150 pixels."
+        );
+    def("chinese_whispers_clustering", &chinese_whispers_clustering, (arg("descriptors"), arg("threshold")),
+        "Takes a list of descriptors and returns a list that contains a label for each descriptor. Clustering is done using dlib::chinese_whispers."
+        );
    {
    typedef std::vector<full_object_detection> type;
    class_<type>("full_object_detections", "An array of full_object_detection objects.")