Improve speed/accuracy of FasterRCNN by introducing a score threshold on RPN (#3205)

* Introduce small score threshold on rpn * Adding docs and fixing keypoint and mask. * Making value 0.0 by default for BC. * Fixing for onnx. * Update threshold. * Removing non-default threshold from reference scripts. Co-authored-by: Francisco Massa <fvsmassa@gmail.com>

Improve speed/accuracy of FasterRCNN by introducing a score threshold on RPN (#3205)
* Introduce small score threshold on rpn * Adding docs and fixing keypoint and mask. * Making value 0.0 by default for BC. * Fixing for onnx. * Update threshold. * Removing non-default threshold from reference scripts. Co-authored-by: Francisco Massa <fvsmassa@gmail.com>
8ebfd2f5 · Vasilis Vryniotis · GitHub · d0063f3d · 8ebfd2f5 · 8ebfd2f5
Unverified Commit 8ebfd2f5 authored Jan 14, 2021 by Vasilis Vryniotis Committed by GitHub Jan 14, 2021
8 changed files
--- a/references/detection/train.py
+++ b/references/detection/train.py
@@ -92,8 +92,11 @@ def main(args):
        collate_fn=utils.collate_fn)
    print("Creating model")
-    model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes,
+    kwargs = {}
-                                                              pretrained=args.pretrained)
+    if "rcnn" in args.model:
+        kwargs["rpn_score_thresh"] = 0.0
+    model = torchvision.models.detection.__dict__[args.model](num_classes=num_classes, pretrained=args.pretrained,
+                                                              **kwargs)
    model.to(device)
    model_without_ddp = model

--- a/test/test_models_detection_negative_samples.py
+++ b/test/test_models_detection_negative_samples.py
@@ -44,7 +44,7 @@ class Tester(unittest.TestCase):
            rpn_anchor_generator, rpn_head,
            0.5, 0.3,
            256, 0.5,
-            2000, 2000, 0.7)
+            2000, 2000, 0.7, 0.05)
        labels, matched_gt_boxes = head.assign_targets_to_anchors(anchors, targets)

--- a/test/test_onnx.py
+++ b/test/test_onnx.py
+from common_utils import set_rng_seed
 import io
 import torch
 from torchvision import ops
@@ -197,12 +198,14 @@ class ONNXExporterTester(unittest.TestCase):
        rpn_pre_nms_top_n = dict(training=2000, testing=1000)
        rpn_post_nms_top_n = dict(training=2000, testing=1000)
        rpn_nms_thresh = 0.7
+        rpn_score_thresh = 0.0
        rpn = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
-            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)
+            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh,
+            score_thresh=rpn_score_thresh)
        return rpn
    def _init_test_roi_heads_faster_rcnn(self):
@@ -255,6 +258,8 @@ class ONNXExporterTester(unittest.TestCase):
        return features
    def test_rpn(self):
+        set_rng_seed(0)
        class RPNModule(torch.nn.Module):
            def __init__(self_module):
                super(RPNModule, self_module).__init__()

--- a/torchvision/models/detection/_utils.py
+++ b/torchvision/models/detection/_utils.py
@@ -173,10 +173,14 @@ class BoxCoder(object):
        box_sum = 0
        for val in boxes_per_image:
            box_sum += val
+        if box_sum > 0:
+            rel_codes = rel_codes.reshape(box_sum, -1)
        pred_boxes = self.decode_single(
-            rel_codes.reshape(box_sum, -1), concat_boxes
+            rel_codes, concat_boxes
        )
-        return pred_boxes.reshape(box_sum, -1, 4)
+        if box_sum > 0:
+            pred_boxes = pred_boxes.reshape(box_sum, -1, 4)
+        return pred_boxes
    def decode_single(self, rel_codes, boxes):
        """

--- a/torchvision/models/detection/faster_rcnn.py
+++ b/torchvision/models/detection/faster_rcnn.py
@@ -79,6 +79,8 @@ class FasterRCNN(GeneralizedRCNN):
            for computing the loss
        rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
            of the RPN
+        rpn_score_thresh (float): during inference, only return proposals with a classification score
+            greater than rpn_score_thresh
        box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
            the locations indicated by the bounding boxes
        box_head (nn.Module): module that takes the cropped feature maps as input
@@ -153,6 +155,7 @@ class FasterRCNN(GeneralizedRCNN):
                 rpn_nms_thresh=0.7,
                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
+                 rpn_score_thresh=0.0,
                 # Box parameters
                 box_roi_pool=None, box_head=None, box_predictor=None,
                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
@@ -197,7 +200,8 @@ class FasterRCNN(GeneralizedRCNN):
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
-            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)
+            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh,
+            score_thresh=rpn_score_thresh)
        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(

--- a/torchvision/models/detection/keypoint_rcnn.py
+++ b/torchvision/models/detection/keypoint_rcnn.py
@@ -74,6 +74,8 @@ class KeypointRCNN(FasterRCNN):
            for computing the loss
        rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
            of the RPN
+        rpn_score_thresh (float): during inference, only return proposals with a classification score
+            greater than rpn_score_thresh
        box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
            the locations indicated by the bounding boxes
        box_head (nn.Module): module that takes the cropped feature maps as input
@@ -158,6 +160,7 @@ class KeypointRCNN(FasterRCNN):
                 rpn_nms_thresh=0.7,
                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
+                 rpn_score_thresh=0.0,
                 # Box parameters
                 box_roi_pool=None, box_head=None, box_predictor=None,
                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
@@ -204,6 +207,7 @@ class KeypointRCNN(FasterRCNN):
            rpn_nms_thresh,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
+            rpn_score_thresh,
            # Box parameters
            box_roi_pool, box_head, box_predictor,
            box_score_thresh, box_nms_thresh, box_detections_per_img,

--- a/torchvision/models/detection/mask_rcnn.py
+++ b/torchvision/models/detection/mask_rcnn.py
@@ -75,6 +75,8 @@ class MaskRCNN(FasterRCNN):
            for computing the loss
        rpn_positive_fraction (float): proportion of positive anchors in a mini-batch during training
            of the RPN
+        rpn_score_thresh (float): during inference, only return proposals with a classification score
+            greater than rpn_score_thresh
        box_roi_pool (MultiScaleRoIAlign): the module which crops and resizes the feature maps in
            the locations indicated by the bounding boxes
        box_head (nn.Module): module that takes the cropped feature maps as input
@@ -158,6 +160,7 @@ class MaskRCNN(FasterRCNN):
                 rpn_nms_thresh=0.7,
                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
+                 rpn_score_thresh=0.0,
                 # Box parameters
                 box_roi_pool=None, box_head=None, box_predictor=None,
                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
@@ -204,6 +207,7 @@ class MaskRCNN(FasterRCNN):
            rpn_nms_thresh,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
+            rpn_score_thresh,
            # Box parameters
            box_roi_pool, box_head, box_predictor,
            box_score_thresh, box_nms_thresh, box_detections_per_img,

--- a/torchvision/models/detection/rpn.py
+++ b/torchvision/models/detection/rpn.py
@@ -141,7 +141,7 @@ class RegionProposalNetwork(torch.nn.Module):
                 fg_iou_thresh, bg_iou_thresh,
                 batch_size_per_image, positive_fraction,
                 #
-                 pre_nms_top_n, post_nms_top_n, nms_thresh):
+                 pre_nms_top_n, post_nms_top_n, nms_thresh, score_thresh=0.0):
        super(RegionProposalNetwork, self).__init__()
        self.anchor_generator = anchor_generator
        self.head = head
@@ -163,6 +163,7 @@ class RegionProposalNetwork(torch.nn.Module):
        self._pre_nms_top_n = pre_nms_top_n
        self._post_nms_top_n = post_nms_top_n
        self.nms_thresh = nms_thresh
+        self.score_thresh = score_thresh
        self.min_size = 1e-3
    def pre_nms_top_n(self):
@@ -251,17 +252,29 @@ class RegionProposalNetwork(torch.nn.Module):
        levels = levels[batch_idx, top_n_idx]
        proposals = proposals[batch_idx, top_n_idx]
+        objectness_prob = F.sigmoid(objectness)
        final_boxes = []
        final_scores = []
-        for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels, image_shapes):
+        for boxes, scores, lvl, img_shape in zip(proposals, objectness_prob, levels, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, img_shape)
+            # remove small boxes
            keep = box_ops.remove_small_boxes(boxes, self.min_size)
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]
+            # remove low scoring boxes
+            # use >= for Backwards compatibility
+            keep = torch.where(scores >= self.score_thresh)[0]
+            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]
            # non-maximum suppression, independently done per level
            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.post_nms_top_n()]
            boxes, scores = boxes[keep], scores[keep]
            final_boxes.append(boxes)
            final_scores.append(scores)
        return final_boxes, final_scores