Unverified Commit d9a69506 authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

Update galleries to use Multi-weight idioms (#6030)

* Update the preprocessing decription for RAFT.

* Fixing incorrect usage of models.

* Fixing the content of viz utils

* Addressing review comments
parent 10acc822
......@@ -81,11 +81,11 @@ img2_batch = torch.stack([frames[101], frames[151]])
plot(img1_batch)
#########################
# The RAFT model that we will use accepts RGB float images with pixel values in
# [-1, 1]. The frames we got from :func:`~torchvision.io.read_video` are int
# images with values in [0, 255], so we will have to pre-process them. We also
# reduce the image sizes for the example to run faster. Image dimension must be
# divisible by 8.
# The RAFT model accepts RGB images. We first get the frames from
# :func:`~torchvision.io.read_video` and resize them to ensure their
# dimensions are divisible by 8. Then we use the transforms bundled into the
# weights in order to preprocess the input and rescale its values to the
# required ``[-1, 1]`` interval.
from torchvision.models.optical_flow import Raft_Large_Weights
......
......@@ -43,8 +43,9 @@ from pathlib import Path
dog1_int = read_image(str(Path('assets') / 'dog1.jpg'))
dog2_int = read_image(str(Path('assets') / 'dog2.jpg'))
dog_list = [dog1_int, dog2_int]
grid = make_grid([dog1_int, dog2_int, dog1_int, dog2_int])
grid = make_grid(dog_list)
show(grid)
####################################
......@@ -65,28 +66,23 @@ show(result)
#####################################
# Naturally, we can also plot bounding boxes produced by torchvision detection
# models. Here is demo with a Faster R-CNN model loaded from
# models. Here is a demo with a Faster R-CNN model loaded from
# :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn`
# model. You can also try using a RetinaNet with
# :func:`~torchvision.models.detection.retinanet_resnet50_fpn`, an SSDlite with
# :func:`~torchvision.models.detection.ssdlite320_mobilenet_v3_large` or an SSD with
# :func:`~torchvision.models.detection.ssd300_vgg16`. For more details
# on the output of such models, you may refer to :ref:`instance_seg_output`.
# model. For more details on the output of such models, you may
# refer to :ref:`instance_seg_output`.
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights
batch_int = torch.stack([dog1_int, dog2_int])
weights = FasterRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()
batch = transforms(batch_int)
images = [transforms(d) for d in dog_list]
model = fasterrcnn_resnet50_fpn(weights=weights, progress=False)
model = model.eval()
outputs = model(batch)
outputs = model(images)
print(outputs)
#####################################
......@@ -96,7 +92,7 @@ print(outputs)
score_threshold = .8
dogs_with_boxes = [
draw_bounding_boxes(dog_int, boxes=output['boxes'][output['scores'] > score_threshold], width=4)
for dog_int, output in zip(batch_int, outputs)
for dog_int, output in zip(dog_list, outputs)
]
show(dogs_with_boxes)
......@@ -114,14 +110,8 @@ show(dogs_with_boxes)
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# We will see how to use it with torchvision's FCN Resnet-50, loaded with
# :func:`~torchvision.models.segmentation.fcn_resnet50`. You can also try using
# DeepLabv3 (:func:`~torchvision.models.segmentation.deeplabv3_resnet50`) or
# lraspp mobilenet models
# (:func:`~torchvision.models.segmentation.lraspp_mobilenet_v3_large`).
#
# Let's start by looking at the output of the model. Remember that in general,
# images must be normalized before they're passed to a semantic segmentation
# model.
# :func:`~torchvision.models.segmentation.fcn_resnet50`. Let's start by looking
# at the output of the model.
from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights
......@@ -131,8 +121,8 @@ transforms = weights.transforms(resize_size=None)
model = fcn_resnet50(weights=weights, progress=False)
model = model.eval()
normalized_batch = transforms(batch)
output = model(normalized_batch)['out']
batch = torch.stack([transforms(d) for d in dog_list])
output = model(batch)['out']
print(output.shape, output.min().item(), output.max().item())
#####################################
......@@ -145,18 +135,13 @@ print(output.shape, output.min().item(), output.max().item())
# Let's plot the masks that have been detected for the dog class and for the
# boat class:
sem_classes = [
'__background__', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
]
sem_class_to_idx = {cls: idx for (idx, cls) in enumerate(sem_classes)}
sem_class_to_idx = {cls: idx for (idx, cls) in enumerate(weights.meta["categories"])}
normalized_masks = torch.nn.functional.softmax(output, dim=1)
dog_and_boat_masks = [
normalized_masks[img_idx, sem_class_to_idx[cls]]
for img_idx in range(batch.shape[0])
for img_idx in range(len(dog_list))
for cls in ('dog', 'boat')
]
......@@ -195,7 +180,7 @@ from torchvision.utils import draw_segmentation_masks
dogs_with_masks = [
draw_segmentation_masks(img, masks=mask, alpha=0.7)
for img, mask in zip(batch_int, boolean_dog_masks)
for img, mask in zip(dog_list, boolean_dog_masks)
]
show(dogs_with_masks)
......@@ -241,7 +226,7 @@ all_classes_masks = all_classes_masks.swapaxes(0, 1)
dogs_with_masks = [
draw_segmentation_masks(img, masks=mask, alpha=.6)
for img, mask in zip(batch_int, all_classes_masks)
for img, mask in zip(dog_list, all_classes_masks)
]
show(dogs_with_masks)
......@@ -272,12 +257,12 @@ from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet5
weights = MaskRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()
batch = transforms(batch_int)
images = [transforms(d) for d in dog_list]
model = maskrcnn_resnet50_fpn(weights=weights, progress=False)
model = model.eval()
output = model(batch)
output = model(images)
print(output)
#####################################
......@@ -304,30 +289,13 @@ print(f"shape = {dog1_masks.shape}, dtype = {dog1_masks.dtype}, "
f"min = {dog1_masks.min()}, max = {dog1_masks.max()}")
#####################################
# Here the masks corresponds to probabilities indicating, for each pixel, how
# Here the masks correspond to probabilities indicating, for each pixel, how
# likely it is to belong to the predicted label of that instance. Those
# predicted labels correspond to the 'labels' element in the same output dict.
# Let's see which labels were predicted for the instances of the first image.
inst_classes = [
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
inst_class_to_idx = {cls: idx for (idx, cls) in enumerate(inst_classes)}
print("For the first dog, the following instances were detected:")
print([inst_classes[label] for label in dog1_output['labels']])
print([weights.meta["categories"][label] for label in dog1_output['labels']])
#####################################
# Interestingly, the model detects two persons in the image. Let's go ahead and
......@@ -369,7 +337,7 @@ boolean_masks = [
dogs_with_masks = [
draw_segmentation_masks(img, mask.squeeze(1))
for img, mask in zip(batch_int, boolean_masks)
for img, mask in zip(dog_list, boolean_masks)
]
show(dogs_with_masks)
......@@ -388,8 +356,6 @@ show(dogs_with_masks)
# torchvision's KeypointRCNN loaded with :func:`~torchvision.models.detection.keypointrcnn_resnet50_fpn`.
# We will first have a look at output of the model.
#
# Note that the keypoint detection model does not need normalized images.
#
from torchvision.models.detection import keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights
from torchvision.io import read_image
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment