Commit 14b1bc9c authored by Francisco Massa's avatar Francisco Massa Committed by Soumith Chintala
Browse files

Add better docs for FasterRCNN, MaskRCNN and KeypointRCNN (#943)

parent 05bc2559
...@@ -99,7 +99,41 @@ class FasterRCNN(GeneralizedRCNN): ...@@ -99,7 +99,41 @@ class FasterRCNN(GeneralizedRCNN):
Example:: Example::
>>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) >>> import torchvision
>>> from torchvision.models.detection import FasterRCNN
>>> from torchvision.models.detection.rpn import AnchorGenerator
>>> # load a pre-trained model for classification and return
>>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
>>> # FasterRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here
>>> backbone.out_channels = 1280
>>>
>>> # let's make the RPN generate 5 x 3 anchors per spatial
>>> # location, with 5 different sizes and 3 different aspect
>>> # ratios. We have a Tuple[Tuple[int]] because each feature
>>> # map could potentially have different sizes and
>>> # aspect ratios
>>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
>>> aspect_ratios=((0.5, 1.0, 2.0),))
>>>
>>> # let's define what are the feature maps that we will
>>> # use to perform the region of interest cropping, as well as
>>> # the size of the crop after rescaling.
>>> # if your backbone returns a Tensor, featmap_names is expected to
>>> # be [0]. More generally, the backbone should return an
>>> # OrderedDict[Tensor], and in featmap_names you can choose which
>>> # feature maps to use.
>>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
>>> output_size=7,
>>> sampling_ratio=2)
>>>
>>> # put the pieces together inside a FasterRCNN model
>>> model = FasterRCNN(backbone,
>>> num_classes=2,
>>> rpn_anchor_generator=anchor_generator,
>>> box_roi_pool=roi_pooler)
>>> model.eval() >>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x) >>> predictions = model(x)
......
...@@ -101,7 +101,47 @@ class KeypointRCNN(FasterRCNN): ...@@ -101,7 +101,47 @@ class KeypointRCNN(FasterRCNN):
Example:: Example::
>>> model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True) >>> import torchvision
>>> from torchvision.models.detection import KeypointRCNN
>>> from torchvision.models.detection.rpn import AnchorGenerator
>>>
>>> # load a pre-trained model for classification and return
>>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
>>> # KeypointRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here
>>> backbone.out_channels = 1280
>>>
>>> # let's make the RPN generate 5 x 3 anchors per spatial
>>> # location, with 5 different sizes and 3 different aspect
>>> # ratios. We have a Tuple[Tuple[int]] because each feature
>>> # map could potentially have different sizes and
>>> # aspect ratios
>>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
>>> aspect_ratios=((0.5, 1.0, 2.0),))
>>>
>>> # let's define what are the feature maps that we will
>>> # use to perform the region of interest cropping, as well as
>>> # the size of the crop after rescaling.
>>> # if your backbone returns a Tensor, featmap_names is expected to
>>> # be [0]. More generally, the backbone should return an
>>> # OrderedDict[Tensor], and in featmap_names you can choose which
>>> # feature maps to use.
>>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
>>> output_size=7,
>>> sampling_ratio=2)
>>>
>>> keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
>>> output_size=14,
>>> sampling_ratio=2)
>>> # put the pieces together inside a FasterRCNN model
>>> model = KeypointRCNN(backbone,
>>> num_classes=2,
>>> rpn_anchor_generator=anchor_generator,
>>> box_roi_pool=roi_pooler,
>>> keypoint_roi_pool=keypoint_roi_pooler)
>>> model.eval()
>>> model.eval() >>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x) >>> predictions = model(x)
......
...@@ -104,7 +104,46 @@ class MaskRCNN(FasterRCNN): ...@@ -104,7 +104,46 @@ class MaskRCNN(FasterRCNN):
Example:: Example::
>>> model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) >>> import torchvision
>>> from torchvision.models.detection import MaskRCNN
>>> from torchvision.models.detection.rpn import AnchorGenerator
>>>
>>> # load a pre-trained model for classification and return
>>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(pretrained=True).features
>>> # MaskRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here
>>> backbone.out_channels = 1280
>>>
>>> # let's make the RPN generate 5 x 3 anchors per spatial
>>> # location, with 5 different sizes and 3 different aspect
>>> # ratios. We have a Tuple[Tuple[int]] because each feature
>>> # map could potentially have different sizes and
>>> # aspect ratios
>>> anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
>>> aspect_ratios=((0.5, 1.0, 2.0),))
>>>
>>> # let's define what are the feature maps that we will
>>> # use to perform the region of interest cropping, as well as
>>> # the size of the crop after rescaling.
>>> # if your backbone returns a Tensor, featmap_names is expected to
>>> # be [0]. More generally, the backbone should return an
>>> # OrderedDict[Tensor], and in featmap_names you can choose which
>>> # feature maps to use.
>>> roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
>>> output_size=7,
>>> sampling_ratio=2)
>>>
>>> mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
>>> output_size=14,
>>> sampling_ratio=2)
>>> # put the pieces together inside a FasterRCNN model
>>> model = MaskRCNN(backbone,
>>> num_classes=2,
>>> rpn_anchor_generator=anchor_generator,
>>> box_roi_pool=roi_pooler,
>>> mask_roi_pool=mask_roi_pooler)
>>> model.eval() >>> model.eval()
>>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
>>> predictions = model(x) >>> predictions = model(x)
...@@ -149,8 +188,10 @@ class MaskRCNN(FasterRCNN): ...@@ -149,8 +188,10 @@ class MaskRCNN(FasterRCNN):
mask_head = MaskRCNNHeads(out_channels, mask_layers, mask_dilation) mask_head = MaskRCNNHeads(out_channels, mask_layers, mask_dilation)
if mask_predictor is None: if mask_predictor is None:
mask_dim_reduced = 256 # == mask_layers[-1] mask_predictor_in_channels = 256 # == mask_layers[-1]
mask_predictor = MaskRCNNPredictor(out_channels, mask_dim_reduced, num_classes) mask_dim_reduced = 256
mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels,
mask_dim_reduced, num_classes)
super(MaskRCNN, self).__init__( super(MaskRCNN, self).__init__(
backbone, num_classes, backbone, num_classes,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment