Commit cc26cd81 authored by panning's avatar panning
Browse files

merge v0.16.0

parents f78f29f5 fbb4cc54
......@@ -31,9 +31,9 @@ class MaskRCNN(FasterRCNN):
The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
image, and should be in 0-1 range. Different images can have different sizes.
The behavior of the model changes depending if it is in training or evaluation mode.
The behavior of the model changes depending on if it is in training or evaluation mode.
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
During training, the model expects both the input tensors and targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
......@@ -56,7 +56,7 @@ class MaskRCNN(FasterRCNN):
Args:
backbone (nn.Module): the network used to compute the features for the model.
It should contain a out_channels attribute, which indicates the number of output
It should contain an out_channels attribute, which indicates the number of output
channels that each feature map has (and it should be the same for all feature maps).
The backbone should return a single Tensor or and OrderedDict[Tensor].
num_classes (int): number of output classes of the model (including the background).
......@@ -123,7 +123,7 @@ class MaskRCNN(FasterRCNN):
>>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # MaskRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here
>>> # so we need to add it here,
>>> backbone.out_channels = 1280
>>>
>>> # let's make the RPN generate 5 x 3 anchors per spatial
......@@ -370,6 +370,8 @@ class MaskRCNN_ResNet50_FPN_Weights(WeightsEnum):
"mask_map": 34.6,
}
},
"_ops": 134.38,
"_file_size": 169.84,
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
......@@ -390,6 +392,8 @@ class MaskRCNN_ResNet50_FPN_V2_Weights(WeightsEnum):
"mask_map": 41.8,
}
},
"_ops": 333.577,
"_file_size": 177.219,
"_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
},
)
......@@ -418,9 +422,9 @@ def maskrcnn_resnet50_fpn(
The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
image, and should be in ``0-1`` range. Different images can have different sizes.
The behavior of the model changes depending if it is in training or evaluation mode.
The behavior of the model changes depending on if it is in training or evaluation mode.
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
During training, the model expects both the input tensors and targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
......@@ -497,7 +501,7 @@ def maskrcnn_resnet50_fpn(
model = MaskRCNN(backbone, num_classes=num_classes, **kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
if weights == MaskRCNN_ResNet50_FPN_Weights.COCO_V1:
overwrite_eps(model, 0.0)
......@@ -578,17 +582,6 @@ def maskrcnn_resnet50_fpn_v2(
)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
return model
# The dictionary below is internal implementation detail and will be removed in v0.15
from .._utils import _ModelURLs
model_urls = _ModelURLs(
{
"maskrcnn_resnet50_fpn_coco": MaskRCNN_ResNet50_FPN_Weights.COCO_V1.url,
}
)
......@@ -327,9 +327,9 @@ class RetinaNet(nn.Module):
The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
image, and should be in 0-1 range. Different images can have different sizes.
The behavior of the model changes depending if it is in training or evaluation mode.
The behavior of the model changes depending on if it is in training or evaluation mode.
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
During training, the model expects both the input tensors and targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
......@@ -382,7 +382,7 @@ class RetinaNet(nn.Module):
>>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # RetinaNet needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # output channels in a backbone. For mobilenet_v2, it's 1280,
>>> # so we need to add it here
>>> backbone.out_channels = 1280
>>>
......@@ -690,6 +690,8 @@ class RetinaNet_ResNet50_FPN_Weights(WeightsEnum):
"box_map": 36.4,
}
},
"_ops": 151.54,
"_file_size": 130.267,
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
......@@ -709,6 +711,8 @@ class RetinaNet_ResNet50_FPN_V2_Weights(WeightsEnum):
"box_map": 41.5,
}
},
"_ops": 152.238,
"_file_size": 146.037,
"_docs": """These weights were produced using an enhanced training recipe to boost the model accuracy.""",
},
)
......@@ -739,9 +743,9 @@ def retinanet_resnet50_fpn(
The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
image, and should be in ``0-1`` range. Different images can have different sizes.
The behavior of the model changes depending if it is in training or evaluation mode.
The behavior of the model changes depending on if it is in training or evaluation mode.
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
During training, the model expects both the input tensors and targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
......@@ -811,7 +815,7 @@ def retinanet_resnet50_fpn(
model = RetinaNet(backbone, num_classes, **kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
if weights == RetinaNet_ResNet50_FPN_Weights.COCO_V1:
overwrite_eps(model, 0.0)
......@@ -890,17 +894,6 @@ def retinanet_resnet50_fpn_v2(
model = RetinaNet(backbone, num_classes, anchor_generator=anchor_generator, head=head, **kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
return model
# The dictionary below is internal implementation detail and will be removed in v0.15
from .._utils import _ModelURLs
model_urls = _ModelURLs(
{
"retinanet_resnet50_fpn_coco": RetinaNet_ResNet50_FPN_Weights.COCO_V1.url,
}
)
......@@ -315,7 +315,7 @@ def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched
valid = torch.cat(valid, dim=0).to(dtype=torch.uint8)
valid = torch.where(valid)[0]
# torch.mean (in binary_cross_entropy_with_logits) does'nt
# torch.mean (in binary_cross_entropy_with_logits) doesn't
# accept empty tensors, so handle it sepaartely
if keypoint_targets.numel() == 0 or len(valid) == 0:
return keypoint_logits.sum() * 0
......@@ -746,7 +746,7 @@ class RoIHeads(nn.Module):
if not t["boxes"].dtype in floating_point_types:
raise TypeError(f"target boxes must of float type, instead got {t['boxes'].dtype}")
if not t["labels"].dtype == torch.int64:
raise TypeError("target labels must of int64 type, instead got {t['labels'].dtype}")
raise TypeError(f"target labels must of int64 type, instead got {t['labels'].dtype}")
if self.has_keypoint():
if not t["keypoints"].dtype == torch.float32:
raise TypeError(f"target keypoints must of float type, instead got {t['keypoints'].dtype}")
......@@ -787,7 +787,7 @@ class RoIHeads(nn.Module):
mask_proposals = [p["boxes"] for p in result]
if self.training:
if matched_idxs is None:
raise ValueError("if in trainning, matched_idxs should not be None")
raise ValueError("if in training, matched_idxs should not be None")
# during training, only focus on positive boxes
num_images = len(proposals)
......
......@@ -39,6 +39,8 @@ class SSD300_VGG16_Weights(WeightsEnum):
"box_map": 25.1,
}
},
"_ops": 34.858,
"_file_size": 135.988,
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
......@@ -126,12 +128,12 @@ class SSD(nn.Module):
Implements SSD architecture from `"SSD: Single Shot MultiBox Detector" <https://arxiv.org/abs/1512.02325>`_.
The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
image, and should be in 0-1 range. Different images can have different sizes but they will be resized
image, and should be in 0-1 range. Different images can have different sizes, but they will be resized
to a fixed size before passing it to the backbone.
The behavior of the model changes depending if it is in training or evaluation mode.
The behavior of the model changes depending on if it is in training or evaluation mode.
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
During training, the model expects both the input tensors and targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
......@@ -554,7 +556,7 @@ def _vgg_extractor(backbone: VGG, highres: bool, trainable_layers: int):
stage_indices = [0] + [i for i, b in enumerate(backbone) if isinstance(b, nn.MaxPool2d)][:-1]
num_stages = len(stage_indices)
# find the index of the layer from which we wont freeze
# find the index of the layer from which we won't freeze
torch._assert(
0 <= trainable_layers <= num_stages,
f"trainable_layers should be in the range [0, {num_stages}]. Instead got {trainable_layers}",
......@@ -588,12 +590,12 @@ def ssd300_vgg16(
.. betastatus:: detection module
The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
image, and should be in 0-1 range. Different images can have different sizes but they will be resized
image, and should be in 0-1 range. Different images can have different sizes, but they will be resized
to a fixed size before passing it to the backbone.
The behavior of the model changes depending if it is in training or evaluation mode.
The behavior of the model changes depending on if it is in training or evaluation mode.
During training, the model expects both the input tensors, as well as a targets (list of dictionary),
During training, the model expects both the input tensors and targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
......@@ -675,28 +677,6 @@ def ssd300_vgg16(
model = SSD(backbone, anchor_generator, (300, 300), num_classes, **kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
return model
# The dictionary below is internal implementation detail and will be removed in v0.15
from .._utils import _ModelURLs
model_urls = _ModelURLs(
{
"ssd300_vgg16_coco": SSD300_VGG16_Weights.COCO_V1.url,
}
)
backbone_urls = _ModelURLs(
{
# We port the features of a VGG16 backbone trained by amdegroot because unlike the one on TorchVision, it uses
# the same input standardization method as the paper.
# Ref: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth
# Only the `features` weights have proper values, those on the `classifier` module are filled with nans.
"vgg16_features": VGG16_Weights.IMAGENET1K_FEATURES.url,
}
)
......@@ -172,7 +172,7 @@ def _mobilenet_extractor(
stage_indices = [0] + [i for i, b in enumerate(backbone) if getattr(b, "_is_cn", False)] + [len(backbone) - 1]
num_stages = len(stage_indices)
# find the index of the layer from which we wont freeze
# find the index of the layer from which we won't freeze
if not 0 <= trainable_layers <= num_stages:
raise ValueError("trainable_layers should be in the range [0, {num_stages}], instead got {trainable_layers}")
freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers]
......@@ -198,6 +198,8 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum):
"box_map": 21.3,
}
},
"_ops": 0.583,
"_file_size": 13.418,
"_docs": """These weights were produced by following a similar training recipe as on the paper.""",
},
)
......@@ -324,17 +326,6 @@ def ssdlite320_mobilenet_v3_large(
)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
return model
# The dictionary below is internal implementation detail and will be removed in v0.15
from .._utils import _ModelURLs
model_urls = _ModelURLs(
{
"ssdlite320_mobilenet_v3_large_coco": SSDLite320_MobileNet_V3_Large_Weights.COCO_V1.url,
}
)
......@@ -24,8 +24,8 @@ def _fake_cast_onnx(v: Tensor) -> float:
def _resize_image_and_masks(
image: Tensor,
self_min_size: float,
self_max_size: float,
self_min_size: int,
self_max_size: int,
target: Optional[Dict[str, Tensor]] = None,
fixed_size: Optional[Tuple[int, int]] = None,
) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
......@@ -40,14 +40,24 @@ def _resize_image_and_masks(
if fixed_size is not None:
size = [fixed_size[1], fixed_size[0]]
else:
min_size = torch.min(im_shape).to(dtype=torch.float32)
max_size = torch.max(im_shape).to(dtype=torch.float32)
scale = torch.min(self_min_size / min_size, self_max_size / max_size)
if torch.jit.is_scripting() or torchvision._is_tracing():
min_size = torch.min(im_shape).to(dtype=torch.float32)
max_size = torch.max(im_shape).to(dtype=torch.float32)
self_min_size_f = float(self_min_size)
self_max_size_f = float(self_max_size)
scale = torch.min(self_min_size_f / min_size, self_max_size_f / max_size)
if torchvision._is_tracing():
scale_factor = _fake_cast_onnx(scale)
else:
scale_factor = scale.item()
if torchvision._is_tracing():
scale_factor = _fake_cast_onnx(scale)
else:
scale_factor = scale.item()
# Do it the normal way
min_size = min(im_shape)
max_size = max(im_shape)
scale_factor = min(self_min_size / min_size, self_max_size / max_size)
recompute_scale_factor = True
image = torch.nn.functional.interpolate(
......@@ -76,7 +86,7 @@ class GeneralizedRCNNTransform(nn.Module):
Performs input / target transformation before feeding the data to a GeneralizedRCNN
model.
The transformations it perform are:
The transformations it performs are:
- input normalization (mean subtraction and std division)
- input / target resizing to match min_size / max_size
......@@ -158,9 +168,8 @@ class GeneralizedRCNNTransform(nn.Module):
def torch_choice(self, k: List[int]) -> int:
"""
Implements `random.choice` via torch ops so it can be compiled with
TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803
is fixed.
Implements `random.choice` via torch ops, so it can be compiled with
TorchScript and we use PyTorch's RNG (not native RNG)
"""
index = int(torch.empty(1).uniform_(0.0, float(len(k))).item())
return k[index]
......@@ -174,11 +183,10 @@ class GeneralizedRCNNTransform(nn.Module):
if self.training:
if self._skip_resize:
return image, target
size = float(self.torch_choice(self.min_size))
size = self.torch_choice(self.min_size)
else:
# FIXME assume for now that testing uses the largest scale
size = float(self.min_size[-1])
image, target = _resize_image_and_masks(image, size, float(self.max_size), target, self.fixed_size)
size = self.min_size[-1]
image, target = _resize_image_and_masks(image, size, self.max_size, target, self.fixed_size)
if target is None:
return image, target
......
import copy
import math
import warnings
from dataclasses import dataclass
from functools import partial
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
......@@ -239,7 +238,6 @@ class EfficientNet(nn.Module):
num_classes: int = 1000,
norm_layer: Optional[Callable[..., nn.Module]] = None,
last_channel: Optional[int] = None,
**kwargs: Any,
) -> None:
"""
EfficientNet V1 and V2 main class
......@@ -263,16 +261,6 @@ class EfficientNet(nn.Module):
):
raise TypeError("The inverted_residual_setting should be List[MBConvConfig]")
if "block" in kwargs:
warnings.warn(
"The parameter 'block' is deprecated since 0.13 and will be removed 0.15. "
"Please pass this information on 'MBConvConfig.block' instead."
)
if kwargs["block"] is not None:
for s in inverted_residual_setting:
if isinstance(s, MBConvConfig):
s.block = kwargs["block"]
if norm_layer is None:
norm_layer = nn.BatchNorm2d
......@@ -369,7 +357,7 @@ def _efficientnet(
model = EfficientNet(inverted_residual_setting, dropout, last_channel=last_channel, **kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
return model
......@@ -464,6 +452,8 @@ class EfficientNet_B0_Weights(WeightsEnum):
"acc@5": 93.532,
}
},
"_ops": 0.386,
"_file_size": 20.451,
"_docs": """These weights are ported from the original paper.""",
},
)
......@@ -473,7 +463,7 @@ class EfficientNet_B0_Weights(WeightsEnum):
class EfficientNet_B1_Weights(WeightsEnum):
IMAGENET1K_V1 = Weights(
# Weights ported from https://github.com/rwightman/pytorch-image-models/
url="https://download.pytorch.org/models/efficientnet_b1_rwightman-533bc792.pth",
url="https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth",
transforms=partial(
ImageClassification, crop_size=240, resize_size=256, interpolation=InterpolationMode.BICUBIC
),
......@@ -486,6 +476,8 @@ class EfficientNet_B1_Weights(WeightsEnum):
"acc@5": 94.186,
}
},
"_ops": 0.687,
"_file_size": 30.134,
"_docs": """These weights are ported from the original paper.""",
},
)
......@@ -504,6 +496,8 @@ class EfficientNet_B1_Weights(WeightsEnum):
"acc@5": 94.934,
}
},
"_ops": 0.687,
"_file_size": 30.136,
"_docs": """
These weights improve upon the results of the original paper by using a modified version of TorchVision's
`new training recipe
......@@ -530,6 +524,8 @@ class EfficientNet_B2_Weights(WeightsEnum):
"acc@5": 95.310,
}
},
"_ops": 1.088,
"_file_size": 35.174,
"_docs": """These weights are ported from the original paper.""",
},
)
......@@ -552,6 +548,8 @@ class EfficientNet_B3_Weights(WeightsEnum):
"acc@5": 96.054,
}
},
"_ops": 1.827,
"_file_size": 47.184,
"_docs": """These weights are ported from the original paper.""",
},
)
......@@ -574,6 +572,8 @@ class EfficientNet_B4_Weights(WeightsEnum):
"acc@5": 96.594,
}
},
"_ops": 4.394,
"_file_size": 74.489,
"_docs": """These weights are ported from the original paper.""",
},
)
......@@ -596,6 +596,8 @@ class EfficientNet_B5_Weights(WeightsEnum):
"acc@5": 96.628,
}
},
"_ops": 10.266,
"_file_size": 116.864,
"_docs": """These weights are ported from the original paper.""",
},
)
......@@ -618,6 +620,8 @@ class EfficientNet_B6_Weights(WeightsEnum):
"acc@5": 96.916,
}
},
"_ops": 19.068,
"_file_size": 165.362,
"_docs": """These weights are ported from the original paper.""",
},
)
......@@ -640,6 +644,8 @@ class EfficientNet_B7_Weights(WeightsEnum):
"acc@5": 96.908,
}
},
"_ops": 37.746,
"_file_size": 254.675,
"_docs": """These weights are ported from the original paper.""",
},
)
......@@ -664,6 +670,8 @@ class EfficientNet_V2_S_Weights(WeightsEnum):
"acc@5": 96.878,
}
},
"_ops": 8.366,
"_file_size": 82.704,
"_docs": """
These weights improve upon the results of the original paper by using a modified version of TorchVision's
`new training recipe
......@@ -692,6 +700,8 @@ class EfficientNet_V2_M_Weights(WeightsEnum):
"acc@5": 97.156,
}
},
"_ops": 24.582,
"_file_size": 208.01,
"_docs": """
These weights improve upon the results of the original paper by using a modified version of TorchVision's
`new training recipe
......@@ -723,6 +733,8 @@ class EfficientNet_V2_L_Weights(WeightsEnum):
"acc@5": 97.788,
}
},
"_ops": 56.08,
"_file_size": 454.573,
"_docs": """These weights are ported from the original paper.""",
},
)
......@@ -755,7 +767,9 @@ def efficientnet_b0(
weights = EfficientNet_B0_Weights.verify(weights)
inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b0", width_mult=1.0, depth_mult=1.0)
return _efficientnet(inverted_residual_setting, 0.2, last_channel, weights, progress, **kwargs)
return _efficientnet(
inverted_residual_setting, kwargs.pop("dropout", 0.2), last_channel, weights, progress, **kwargs
)
@register_model()
......@@ -784,7 +798,9 @@ def efficientnet_b1(
weights = EfficientNet_B1_Weights.verify(weights)
inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b1", width_mult=1.0, depth_mult=1.1)
return _efficientnet(inverted_residual_setting, 0.2, last_channel, weights, progress, **kwargs)
return _efficientnet(
inverted_residual_setting, kwargs.pop("dropout", 0.2), last_channel, weights, progress, **kwargs
)
@register_model()
......@@ -813,7 +829,9 @@ def efficientnet_b2(
weights = EfficientNet_B2_Weights.verify(weights)
inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b2", width_mult=1.1, depth_mult=1.2)
return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs)
return _efficientnet(
inverted_residual_setting, kwargs.pop("dropout", 0.3), last_channel, weights, progress, **kwargs
)
@register_model()
......@@ -842,7 +860,14 @@ def efficientnet_b3(
weights = EfficientNet_B3_Weights.verify(weights)
inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b3", width_mult=1.2, depth_mult=1.4)
return _efficientnet(inverted_residual_setting, 0.3, last_channel, weights, progress, **kwargs)
return _efficientnet(
inverted_residual_setting,
kwargs.pop("dropout", 0.3),
last_channel,
weights,
progress,
**kwargs,
)
@register_model()
......@@ -871,7 +896,14 @@ def efficientnet_b4(
weights = EfficientNet_B4_Weights.verify(weights)
inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b4", width_mult=1.4, depth_mult=1.8)
return _efficientnet(inverted_residual_setting, 0.4, last_channel, weights, progress, **kwargs)
return _efficientnet(
inverted_residual_setting,
kwargs.pop("dropout", 0.4),
last_channel,
weights,
progress,
**kwargs,
)
@register_model()
......@@ -902,7 +934,7 @@ def efficientnet_b5(
inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b5", width_mult=1.6, depth_mult=2.2)
return _efficientnet(
inverted_residual_setting,
0.4,
kwargs.pop("dropout", 0.4),
last_channel,
weights,
progress,
......@@ -939,7 +971,7 @@ def efficientnet_b6(
inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b6", width_mult=1.8, depth_mult=2.6)
return _efficientnet(
inverted_residual_setting,
0.5,
kwargs.pop("dropout", 0.5),
last_channel,
weights,
progress,
......@@ -976,7 +1008,7 @@ def efficientnet_b7(
inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_b7", width_mult=2.0, depth_mult=3.1)
return _efficientnet(
inverted_residual_setting,
0.5,
kwargs.pop("dropout", 0.5),
last_channel,
weights,
progress,
......@@ -1014,7 +1046,7 @@ def efficientnet_v2_s(
inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_s")
return _efficientnet(
inverted_residual_setting,
0.2,
kwargs.pop("dropout", 0.2),
last_channel,
weights,
progress,
......@@ -1052,7 +1084,7 @@ def efficientnet_v2_m(
inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_m")
return _efficientnet(
inverted_residual_setting,
0.3,
kwargs.pop("dropout", 0.3),
last_channel,
weights,
progress,
......@@ -1090,28 +1122,10 @@ def efficientnet_v2_l(
inverted_residual_setting, last_channel = _efficientnet_conf("efficientnet_v2_l")
return _efficientnet(
inverted_residual_setting,
0.4,
kwargs.pop("dropout", 0.4),
last_channel,
weights,
progress,
norm_layer=partial(nn.BatchNorm2d, eps=1e-03),
**kwargs,
)
# The dictionary below is internal implementation detail and will be removed in v0.15
from ._utils import _ModelURLs
model_urls = _ModelURLs(
{
"efficientnet_b0": EfficientNet_B0_Weights.IMAGENET1K_V1.url,
"efficientnet_b1": EfficientNet_B1_Weights.IMAGENET1K_V1.url,
"efficientnet_b2": EfficientNet_B2_Weights.IMAGENET1K_V1.url,
"efficientnet_b3": EfficientNet_B3_Weights.IMAGENET1K_V1.url,
"efficientnet_b4": EfficientNet_B4_Weights.IMAGENET1K_V1.url,
"efficientnet_b5": EfficientNet_B5_Weights.IMAGENET1K_V1.url,
"efficientnet_b6": EfficientNet_B6_Weights.IMAGENET1K_V1.url,
"efficientnet_b7": EfficientNet_B7_Weights.IMAGENET1K_V1.url,
}
)
......@@ -18,7 +18,7 @@ __all__ = ["create_feature_extractor", "get_graph_node_names"]
class LeafModuleAwareTracer(fx.Tracer):
"""
An fx.Tracer that allows the user to specify a set of leaf modules, ie.
An fx.Tracer that allows the user to specify a set of leaf modules, i.e.
modules that are not to be traced through. The resulting graph ends up
having single nodes referencing calls to the leaf modules' forward methods.
"""
......@@ -103,7 +103,7 @@ class NodePathTracer(LeafModuleAwareTracer):
if node.op != "call_module":
# In this case module_qualname from torch.fx doesn't go all the
# way to the leaf function/op so we need to append it
# way to the leaf function/op, so we need to append it
if len(node_qualname) > 0:
# Only append '.' if we are deeper than the top level module
node_qualname += "."
......@@ -136,7 +136,7 @@ class NodePathTracer(LeafModuleAwareTracer):
def _is_subseq(x, y):
"""Check if y is a subseqence of x
"""Check if y is a subsequence of x
https://stackoverflow.com/a/24017747/4391249
"""
iter_x = iter(x)
......@@ -228,7 +228,7 @@ def get_graph_node_names(
tracer_kwargs (dict, optional): a dictionary of keyword arguments for
``NodePathTracer`` (they are eventually passed onto
`torch.fx.Tracer <https://pytorch.org/docs/stable/fx.html#torch.fx.Tracer>`_).
By default it will be set to wrap and make leaf nodes all torchvision ops:
By default, it will be set to wrap and make leaf nodes all torchvision ops:
{"autowrap_modules": (math, torchvision.ops,),"leaf_modules": _get_leaf_modules_for_ops(),}
WARNING: In case the user provides tracer_kwargs, above default arguments will be appended to the user
provided dictionary.
......@@ -391,7 +391,7 @@ def create_feature_extractor(
tracer_kwargs (dict, optional): a dictionary of keyword arguments for
``NodePathTracer`` (which passes them onto it's parent class
`torch.fx.Tracer <https://pytorch.org/docs/stable/fx.html#torch.fx.Tracer>`_).
By default it will be set to wrap and make leaf nodes all torchvision ops:
By default, it will be set to wrap and make leaf nodes all torchvision ops:
{"autowrap_modules": (math, torchvision.ops,),"leaf_modules": _get_leaf_modules_for_ops(),}
WARNING: In case the user provides tracer_kwargs, above default arguments will be appended to the user
provided dictionary.
......@@ -544,7 +544,7 @@ def create_feature_extractor(
graph_module.graph.eliminate_dead_code()
graph_module.recompile()
# Keep track of the tracer and graph so we can choose the main one
# Keep track of the tracer and graph, so we can choose the main one
tracers[mode] = tracer
graphs[mode] = graph
......
......@@ -290,6 +290,8 @@ class GoogLeNet_Weights(WeightsEnum):
"acc@5": 89.530,
}
},
"_ops": 1.498,
"_file_size": 49.731,
"_docs": """These weights are ported from the original paper.""",
},
)
......@@ -330,7 +332,7 @@ def googlenet(*, weights: Optional[GoogLeNet_Weights] = None, progress: bool = T
model = GoogLeNet(**kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
if not original_aux_logits:
model.aux_logits = False
model.aux1 = None # type: ignore[assignment]
......@@ -341,15 +343,3 @@ def googlenet(*, weights: Optional[GoogLeNet_Weights] = None, progress: bool = T
)
return model
# The dictionary below is internal implementation detail and will be removed in v0.15
from ._utils import _ModelURLs
model_urls = _ModelURLs(
{
# GoogLeNet ported from TensorFlow
"googlenet": GoogLeNet_Weights.IMAGENET1K_V1.url,
}
)
......@@ -48,7 +48,7 @@ class Inception3(nn.Module):
)
init_weights = True
if len(inception_blocks) != 7:
raise ValueError(f"lenght of inception_blocks should be 7 instead of {len(inception_blocks)}")
raise ValueError(f"length of inception_blocks should be 7 instead of {len(inception_blocks)}")
conv_block = inception_blocks[0]
inception_a = inception_blocks[1]
inception_b = inception_blocks[2]
......@@ -422,6 +422,8 @@ class Inception_V3_Weights(WeightsEnum):
"acc@5": 93.450,
}
},
"_ops": 5.713,
"_file_size": 103.903,
"_docs": """These weights are ported from the original paper.""",
},
)
......@@ -468,21 +470,9 @@ def inception_v3(*, weights: Optional[Inception_V3_Weights] = None, progress: bo
model = Inception3(**kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
if not original_aux_logits:
model.aux_logits = False
model.AuxLogits = None
return model
# The dictionary below is internal implementation detail and will be removed in v0.15
from ._utils import _ModelURLs
model_urls = _ModelURLs(
{
# Inception v3 ported from TensorFlow
"inception_v3_google": Inception_V3_Weights.IMAGENET1K_V1.url,
}
)
import math
from collections import OrderedDict
from functools import partial
from typing import Any, Callable, List, Optional, OrderedDict, Sequence, Tuple
from typing import Any, Callable, List, Optional, Sequence, Tuple
import numpy as np
import torch
......@@ -300,7 +301,7 @@ class PartitionAttentionLayer(nn.Module):
self,
in_channels: int,
head_dim: int,
# partitioning parameteres
# partitioning parameters
partition_size: int,
partition_type: str,
# grid size needs to be known at initialization time
......@@ -426,7 +427,7 @@ class MaxVitLayer(nn.Module):
) -> None:
super().__init__()
layers: OrderedDict[str, Any] = OrderedDict() # type: ignore
layers: OrderedDict = OrderedDict()
# convolutional layer
layers["MBconv"] = MBConv(
......@@ -762,7 +763,7 @@ def _maxvit(
)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
return model
......@@ -785,6 +786,8 @@ class MaxVit_T_Weights(WeightsEnum):
"acc@5": 96.722,
}
},
"_ops": 5.558,
"_file_size": 118.769,
"_docs": """These weights reproduce closely the results of the paper using a similar training recipe.""",
},
)
......
......@@ -88,14 +88,14 @@ def _round_to_multiple_of(val: float, divisor: int, round_up_bias: float = 0.9)
def _get_depths(alpha: float) -> List[int]:
"""Scales tensor depths as in reference MobileNet code, prefers rouding up
"""Scales tensor depths as in reference MobileNet code, prefers rounding up
rather than down."""
depths = [32, 16, 24, 40, 80, 96, 192, 320]
return [_round_to_multiple_of(depth * alpha, 8) for depth in depths]
class MNASNet(torch.nn.Module):
"""MNASNet, as described in https://arxiv.org/pdf/1807.11626.pdf. This
"""MNASNet, as described in https://arxiv.org/abs/1807.11626. This
implements the B1 variant of the model.
>>> model = MNASNet(1.0, num_classes=1000)
>>> x = torch.rand(1, 3, 224, 224)
......@@ -231,6 +231,8 @@ class MNASNet0_5_Weights(WeightsEnum):
"acc@5": 87.490,
}
},
"_ops": 0.104,
"_file_size": 8.591,
"_docs": """These weights reproduce closely the results of the paper.""",
},
)
......@@ -251,6 +253,8 @@ class MNASNet0_75_Weights(WeightsEnum):
"acc@5": 90.496,
}
},
"_ops": 0.215,
"_file_size": 12.303,
"_docs": """
These weights were trained from scratch by using TorchVision's `new training recipe
<https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
......@@ -273,6 +277,8 @@ class MNASNet1_0_Weights(WeightsEnum):
"acc@5": 91.510,
}
},
"_ops": 0.314,
"_file_size": 16.915,
"_docs": """These weights reproduce closely the results of the paper.""",
},
)
......@@ -293,6 +299,8 @@ class MNASNet1_3_Weights(WeightsEnum):
"acc@5": 93.522,
}
},
"_ops": 0.526,
"_file_size": 24.246,
"_docs": """
These weights were trained from scratch by using TorchVision's `new training recipe
<https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
......@@ -309,7 +317,7 @@ def _mnasnet(alpha: float, weights: Optional[WeightsEnum], progress: bool, **kwa
model = MNASNet(alpha, **kwargs)
if weights:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
return model
......@@ -319,7 +327,7 @@ def _mnasnet(alpha: float, weights: Optional[WeightsEnum], progress: bool, **kwa
def mnasnet0_5(*, weights: Optional[MNASNet0_5_Weights] = None, progress: bool = True, **kwargs: Any) -> MNASNet:
"""MNASNet with depth multiplier of 0.5 from
`MnasNet: Platform-Aware Neural Architecture Search for Mobile
<https://arxiv.org/pdf/1807.11626.pdf>`_ paper.
<https://arxiv.org/abs/1807.11626>`_ paper.
Args:
weights (:class:`~torchvision.models.MNASNet0_5_Weights`, optional): The
......@@ -347,7 +355,7 @@ def mnasnet0_5(*, weights: Optional[MNASNet0_5_Weights] = None, progress: bool =
def mnasnet0_75(*, weights: Optional[MNASNet0_75_Weights] = None, progress: bool = True, **kwargs: Any) -> MNASNet:
"""MNASNet with depth multiplier of 0.75 from
`MnasNet: Platform-Aware Neural Architecture Search for Mobile
<https://arxiv.org/pdf/1807.11626.pdf>`_ paper.
<https://arxiv.org/abs/1807.11626>`_ paper.
Args:
weights (:class:`~torchvision.models.MNASNet0_75_Weights`, optional): The
......@@ -375,7 +383,7 @@ def mnasnet0_75(*, weights: Optional[MNASNet0_75_Weights] = None, progress: bool
def mnasnet1_0(*, weights: Optional[MNASNet1_0_Weights] = None, progress: bool = True, **kwargs: Any) -> MNASNet:
"""MNASNet with depth multiplier of 1.0 from
`MnasNet: Platform-Aware Neural Architecture Search for Mobile
<https://arxiv.org/pdf/1807.11626.pdf>`_ paper.
<https://arxiv.org/abs/1807.11626>`_ paper.
Args:
weights (:class:`~torchvision.models.MNASNet1_0_Weights`, optional): The
......@@ -403,7 +411,7 @@ def mnasnet1_0(*, weights: Optional[MNASNet1_0_Weights] = None, progress: bool =
def mnasnet1_3(*, weights: Optional[MNASNet1_3_Weights] = None, progress: bool = True, **kwargs: Any) -> MNASNet:
"""MNASNet with depth multiplier of 1.3 from
`MnasNet: Platform-Aware Neural Architecture Search for Mobile
<https://arxiv.org/pdf/1807.11626.pdf>`_ paper.
<https://arxiv.org/abs/1807.11626>`_ paper.
Args:
weights (:class:`~torchvision.models.MNASNet1_3_Weights`, optional): The
......
......@@ -23,7 +23,7 @@ class InvertedResidual(nn.Module):
super().__init__()
self.stride = stride
if stride not in [1, 2]:
raise ValueError(f"stride should be 1 or 2 insted of {stride}")
raise ValueError(f"stride should be 1 or 2 instead of {stride}")
if norm_layer is None:
norm_layer = nn.BatchNorm2d
......@@ -194,6 +194,8 @@ class MobileNet_V2_Weights(WeightsEnum):
"acc@5": 90.286,
}
},
"_ops": 0.301,
"_file_size": 13.555,
"_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
},
)
......@@ -209,6 +211,8 @@ class MobileNet_V2_Weights(WeightsEnum):
"acc@5": 90.822,
}
},
"_ops": 0.301,
"_file_size": 13.598,
"_docs": """
These weights improve upon the results of the original paper by using a modified version of TorchVision's
`new training recipe
......@@ -251,17 +255,6 @@ def mobilenet_v2(
model = MobileNetV2(**kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
return model
# The dictionary below is internal implementation detail and will be removed in v0.15
from ._utils import _ModelURLs
model_urls = _ModelURLs(
{
"mobilenet_v2": MobileNet_V2_Weights.IMAGENET1K_V1.url,
}
)
......@@ -282,7 +282,7 @@ def _mobilenet_v3(
model = MobileNetV3(inverted_residual_setting, last_channel, **kwargs)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
return model
......@@ -307,6 +307,8 @@ class MobileNet_V3_Large_Weights(WeightsEnum):
"acc@5": 91.340,
}
},
"_ops": 0.217,
"_file_size": 21.114,
"_docs": """These weights were trained from scratch by using a simple training recipe.""",
},
)
......@@ -323,6 +325,8 @@ class MobileNet_V3_Large_Weights(WeightsEnum):
"acc@5": 92.566,
}
},
"_ops": 0.217,
"_file_size": 21.107,
"_docs": """
These weights improve marginally upon the results of the original paper by using a modified version of
TorchVision's `new training recipe
......@@ -347,6 +351,8 @@ class MobileNet_V3_Small_Weights(WeightsEnum):
"acc@5": 87.402,
}
},
"_ops": 0.057,
"_file_size": 9.829,
"_docs": """
These weights improve upon the results of the original paper by using a simple training recipe.
""",
......@@ -372,7 +378,7 @@ def mobilenet_v3_large(
weights are used.
progress (bool, optional): If True, displays a progress bar of the
download to stderr. Default is True.
**kwargs: parameters passed to the ``torchvision.models.resnet.MobileNetV3``
**kwargs: parameters passed to the ``torchvision.models.mobilenet.MobileNetV3``
base class. Please refer to the `source code
<https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py>`_
for more details about this class.
......@@ -403,7 +409,7 @@ def mobilenet_v3_small(
weights are used.
progress (bool, optional): If True, displays a progress bar of the
download to stderr. Default is True.
**kwargs: parameters passed to the ``torchvision.models.resnet.MobileNetV3``
**kwargs: parameters passed to the ``torchvision.models.mobilenet.MobileNetV3``
base class. Please refer to the `source code
<https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py>`_
for more details about this class.
......@@ -415,15 +421,3 @@ def mobilenet_v3_small(
inverted_residual_setting, last_channel = _mobilenet_v3_conf("mobilenet_v3_small", **kwargs)
return _mobilenet_v3(inverted_residual_setting, last_channel, weights, progress, **kwargs)
# The dictionary below is internal implementation detail and will be removed in v0.15
from ._utils import _ModelURLs
model_urls = _ModelURLs(
{
"mobilenet_v3_large": MobileNet_V3_Large_Weights.IMAGENET1K_V1.url,
"mobilenet_v3_small": MobileNet_V3_Small_Weights.IMAGENET1K_V1.url,
}
)
......@@ -35,7 +35,7 @@ class ResidualBlock(nn.Module):
# But in the RAFT training reference, the BatchNorm2d layers are only activated for the first dataset,
# and frozen for the rest of the training process (i.e. set as eval()). The bias term is thus still useful
# for the rest of the datasets. Technically, we could remove the bias for other norm layers like Instance norm
# because these aren't frozen, but we don't bother (also, we woudn't be able to load the original weights).
# because these aren't frozen, but we don't bother (also, we wouldn't be able to load the original weights).
self.convnormrelu1 = Conv2dNormActivation(
in_channels, out_channels, norm_layer=norm_layer, kernel_size=3, stride=stride, bias=True
)
......@@ -318,7 +318,7 @@ class MaskPredictor(nn.Module):
def __init__(self, *, in_channels, hidden_size, multiplier=0.25):
super().__init__()
self.convrelu = Conv2dNormActivation(in_channels, hidden_size, norm_layer=None, kernel_size=3)
# 8 * 8 * 9 because the predicted flow is downsampled by 8, from the downsampling of the initial FeatureEncoder
# 8 * 8 * 9 because the predicted flow is downsampled by 8, from the downsampling of the initial FeatureEncoder,
# and we interpolate with all 9 surrounding neighbors. See paper and appendix B.
self.conv = nn.Conv2d(hidden_size, 8 * 8 * 9, 1, padding=0)
......@@ -369,6 +369,19 @@ class CorrBlock(nn.Module):
raise ValueError(
f"Input feature maps should have the same shape, instead got {fmap1.shape} (fmap1.shape) != {fmap2.shape} (fmap2.shape)"
)
# Explaining min_fmap_size below: the fmaps are down-sampled (num_levels - 1) times by a factor of 2.
# The last corr_volume most have at least 2 values (hence the 2* factor), otherwise grid_sample() would
# produce nans in its output.
min_fmap_size = 2 * (2 ** (self.num_levels - 1))
if any(fmap_size < min_fmap_size for fmap_size in fmap1.shape[-2:]):
raise ValueError(
"Feature maps are too small to be down-sampled by the correlation pyramid. "
f"H and W of feature maps should be at least {min_fmap_size}; got: {fmap1.shape[-2:]}. "
"Remember that input images to the model are downsampled by 8, so that means their "
f"dimensions should be at least 8 * {min_fmap_size} = {8 * min_fmap_size}."
)
corr_volume = self._compute_corr_volume(fmap1, fmap2)
batch_size, h, w, num_channels, _, _ = corr_volume.shape # _, _ = h, w
......@@ -430,7 +443,7 @@ class RAFT(nn.Module):
Its input is ``image1``. As in the original implementation, its output will be split into 2 parts:
- one part will be used as the actual "context", passed to the recurrent unit of the ``update_block``
- one part will be used to initialize the hidden state of the of the recurrent unit of
- one part will be used to initialize the hidden state of the recurrent unit of
the ``update_block``
These 2 parts are split according to the ``hidden_state_size`` of the ``update_block``, so the output
......@@ -474,7 +487,7 @@ class RAFT(nn.Module):
if (h, w) != image2.shape[-2:]:
raise ValueError(f"input images should have the same shape, instead got ({h}, {w}) != {image2.shape[-2:]}")
if not (h % 8 == 0) and (w % 8 == 0):
raise ValueError(f"input image H and W should be divisible by 8, insted got {h} (h) and {w} (w)")
raise ValueError(f"input image H and W should be divisible by 8, instead got {h} (h) and {w} (w)")
fmaps = self.feature_encoder(torch.cat([image1, image2], dim=0))
fmap1, fmap2 = torch.chunk(fmaps, chunks=2, dim=0)
......@@ -552,6 +565,8 @@ class Raft_Large_Weights(WeightsEnum):
"Sintel-Train-Finalpass": {"epe": 2.7894},
"Kitti-Train": {"per_image_epe": 5.0172, "fl_all": 17.4506},
},
"_ops": 211.007,
"_file_size": 20.129,
"_docs": """These weights were ported from the original paper. They
are trained on :class:`~torchvision.datasets.FlyingChairs` +
:class:`~torchvision.datasets.FlyingThings3D`.""",
......@@ -570,6 +585,8 @@ class Raft_Large_Weights(WeightsEnum):
"Sintel-Train-Finalpass": {"epe": 2.7161},
"Kitti-Train": {"per_image_epe": 4.5118, "fl_all": 16.0679},
},
"_ops": 211.007,
"_file_size": 20.129,
"_docs": """These weights were trained from scratch on
:class:`~torchvision.datasets.FlyingChairs` +
:class:`~torchvision.datasets.FlyingThings3D`.""",
......@@ -588,6 +605,8 @@ class Raft_Large_Weights(WeightsEnum):
"Sintel-Test-Cleanpass": {"epe": 1.94},
"Sintel-Test-Finalpass": {"epe": 3.18},
},
"_ops": 211.007,
"_file_size": 20.129,
"_docs": """
These weights were ported from the original paper. They are
trained on :class:`~torchvision.datasets.FlyingChairs` +
......@@ -612,6 +631,8 @@ class Raft_Large_Weights(WeightsEnum):
"Sintel-Test-Cleanpass": {"epe": 1.819},
"Sintel-Test-Finalpass": {"epe": 3.067},
},
"_ops": 211.007,
"_file_size": 20.129,
"_docs": """
These weights were trained from scratch. They are
pre-trained on :class:`~torchvision.datasets.FlyingChairs` +
......@@ -636,6 +657,8 @@ class Raft_Large_Weights(WeightsEnum):
"_metrics": {
"Kitti-Test": {"fl_all": 5.10},
},
"_ops": 211.007,
"_file_size": 20.129,
"_docs": """
These weights were ported from the original paper. They are
pre-trained on :class:`~torchvision.datasets.FlyingChairs` +
......@@ -657,6 +680,8 @@ class Raft_Large_Weights(WeightsEnum):
"_metrics": {
"Kitti-Test": {"fl_all": 5.19},
},
"_ops": 211.007,
"_file_size": 20.129,
"_docs": """
These weights were trained from scratch. They are
pre-trained on :class:`~torchvision.datasets.FlyingChairs` +
......@@ -698,6 +723,8 @@ class Raft_Small_Weights(WeightsEnum):
"Sintel-Train-Finalpass": {"epe": 3.2790},
"Kitti-Train": {"per_image_epe": 7.6557, "fl_all": 25.2801},
},
"_ops": 47.655,
"_file_size": 3.821,
"_docs": """These weights were ported from the original paper. They
are trained on :class:`~torchvision.datasets.FlyingChairs` +
:class:`~torchvision.datasets.FlyingThings3D`.""",
......@@ -715,6 +742,8 @@ class Raft_Small_Weights(WeightsEnum):
"Sintel-Train-Finalpass": {"epe": 3.2831},
"Kitti-Train": {"per_image_epe": 7.5978, "fl_all": 25.2369},
},
"_ops": 47.655,
"_file_size": 3.821,
"_docs": """These weights were trained from scratch on
:class:`~torchvision.datasets.FlyingChairs` +
:class:`~torchvision.datasets.FlyingThings3D`.""",
......@@ -802,7 +831,7 @@ def _raft(
)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
return model
......
......@@ -108,7 +108,7 @@ class QuantizableGoogLeNet(GoogLeNet):
class GoogLeNet_QuantizedWeights(WeightsEnum):
IMAGENET1K_FBGEMM_V1 = Weights(
url="https://download.pytorch.org/models/quantized/googlenet_fbgemm-c00238cf.pth",
url="https://download.pytorch.org/models/quantized/googlenet_fbgemm-c81f6644.pth",
transforms=partial(ImageClassification, crop_size=224),
meta={
"num_params": 6624904,
......@@ -123,6 +123,8 @@ class GoogLeNet_QuantizedWeights(WeightsEnum):
"acc@5": 89.404,
}
},
"_ops": 1.498,
"_file_size": 12.618,
"_docs": """
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
weights listed below.
......@@ -195,7 +197,7 @@ def googlenet(
quantize_model(model, backend)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
if not original_aux_logits:
model.aux_logits = False
model.aux1 = None # type: ignore[assignment]
......@@ -206,16 +208,3 @@ def googlenet(
)
return model
# The dictionary below is internal implementation detail and will be removed in v0.15
from .._utils import _ModelURLs
from ..googlenet import model_urls # noqa: F401
quant_model_urls = _ModelURLs(
{
# fp32 GoogLeNet ported from TensorFlow, with weights quantized in PyTorch
"googlenet_fbgemm": GoogLeNet_QuantizedWeights.IMAGENET1K_FBGEMM_V1.url,
}
)
......@@ -168,7 +168,7 @@ class QuantizableInception3(inception_module.Inception3):
class Inception_V3_QuantizedWeights(WeightsEnum):
IMAGENET1K_FBGEMM_V1 = Weights(
url="https://download.pytorch.org/models/quantized/inception_v3_google_fbgemm-71447a44.pth",
url="https://download.pytorch.org/models/quantized/inception_v3_google_fbgemm-a2837893.pth",
transforms=partial(ImageClassification, crop_size=299, resize_size=342),
meta={
"num_params": 27161264,
......@@ -183,6 +183,8 @@ class Inception_V3_QuantizedWeights(WeightsEnum):
"acc@5": 93.354,
}
},
"_ops": 5.713,
"_file_size": 23.146,
"_docs": """
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
weights listed below.
......@@ -263,22 +265,9 @@ def inception_v3(
if quantize and not original_aux_logits:
model.aux_logits = False
model.AuxLogits = None
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
if not quantize and not original_aux_logits:
model.aux_logits = False
model.AuxLogits = None
return model
# The dictionary below is internal implementation detail and will be removed in v0.15
from .._utils import _ModelURLs
from ..inception import model_urls # noqa: F401
quant_model_urls = _ModelURLs(
{
# fp32 weights ported from TensorFlow, quantized in PyTorch
"inception_v3_google_fbgemm": Inception_V3_QuantizedWeights.IMAGENET1K_FBGEMM_V1.url,
}
)
......@@ -80,6 +80,8 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum):
"acc@5": 90.150,
}
},
"_ops": 0.301,
"_file_size": 3.423,
"_docs": """
These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized
weights listed below.
......@@ -147,18 +149,6 @@ def mobilenet_v2(
quantize_model(model, backend)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
return model
# The dictionary below is internal implementation detail and will be removed in v0.15
from .._utils import _ModelURLs
from ..mobilenetv2 import model_urls # noqa: F401
quant_model_urls = _ModelURLs(
{
"mobilenet_v2_qnnpack": MobileNet_V2_QuantizedWeights.IMAGENET1K_QNNPACK_V1.url,
}
)
......@@ -149,7 +149,7 @@ def _mobilenet_v3_model(
torch.ao.quantization.prepare_qat(model, inplace=True)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
if quantize:
torch.ao.quantization.convert(model, inplace=True)
......@@ -175,6 +175,8 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum):
"acc@5": 90.858,
}
},
"_ops": 0.217,
"_file_size": 21.554,
"_docs": """
These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized
weights listed below.
......@@ -233,15 +235,3 @@ def mobilenet_v3_large(
inverted_residual_setting, last_channel = _mobilenet_v3_conf("mobilenet_v3_large", **kwargs)
return _mobilenet_v3_model(inverted_residual_setting, last_channel, weights, progress, quantize, **kwargs)
# The dictionary below is internal implementation detail and will be removed in v0.15
from .._utils import _ModelURLs
from ..mobilenetv3 import model_urls # noqa: F401
quant_model_urls = _ModelURLs(
{
"mobilenet_v3_large_qnnpack": MobileNet_V3_Large_QuantizedWeights.IMAGENET1K_QNNPACK_V1.url,
}
)
......@@ -144,7 +144,7 @@ def _resnet(
quantize_model(model, backend)
if weights is not None:
model.load_state_dict(weights.get_state_dict(progress=progress))
model.load_state_dict(weights.get_state_dict(progress=progress, check_hash=True))
return model
......@@ -175,6 +175,8 @@ class ResNet18_QuantizedWeights(WeightsEnum):
"acc@5": 88.882,
}
},
"_ops": 1.814,
"_file_size": 11.238,
},
)
DEFAULT = IMAGENET1K_FBGEMM_V1
......@@ -194,6 +196,8 @@ class ResNet50_QuantizedWeights(WeightsEnum):
"acc@5": 92.814,
}
},
"_ops": 4.089,
"_file_size": 24.759,
},
)
IMAGENET1K_FBGEMM_V2 = Weights(
......@@ -209,6 +213,8 @@ class ResNet50_QuantizedWeights(WeightsEnum):
"acc@5": 94.976,
}
},
"_ops": 4.089,
"_file_size": 24.953,
},
)
DEFAULT = IMAGENET1K_FBGEMM_V2
......@@ -228,6 +234,8 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum):
"acc@5": 94.480,
}
},
"_ops": 16.414,
"_file_size": 86.034,
},
)
IMAGENET1K_FBGEMM_V2 = Weights(
......@@ -243,6 +251,8 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum):
"acc@5": 96.132,
}
},
"_ops": 16.414,
"_file_size": 86.645,
},
)
DEFAULT = IMAGENET1K_FBGEMM_V2
......@@ -263,6 +273,8 @@ class ResNeXt101_64X4D_QuantizedWeights(WeightsEnum):
"acc@5": 96.326,
}
},
"_ops": 15.46,
"_file_size": 81.556,
},
)
DEFAULT = IMAGENET1K_FBGEMM_V1
......@@ -470,17 +482,3 @@ def resnext101_64x4d(
_ovewrite_named_param(kwargs, "groups", 64)
_ovewrite_named_param(kwargs, "width_per_group", 4)
return _resnet(QuantizableBottleneck, [3, 4, 23, 3], weights, progress, quantize, **kwargs)
# The dictionary below is internal implementation detail and will be removed in v0.15
from .._utils import _ModelURLs
from ..resnet import model_urls # noqa: F401
quant_model_urls = _ModelURLs(
{
"resnet18_fbgemm": ResNet18_QuantizedWeights.IMAGENET1K_FBGEMM_V1.url,
"resnet50_fbgemm": ResNet50_QuantizedWeights.IMAGENET1K_FBGEMM_V1.url,
"resnext101_32x8d_fbgemm": ResNeXt101_32X8D_QuantizedWeights.IMAGENET1K_FBGEMM_V1.url,
}
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment