Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
vision
Commits
cc26cd81
Commit
cc26cd81
authored
Nov 27, 2023
by
panning
Browse files
merge v0.16.0
parents
f78f29f5
fbb4cc54
Changes
370
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
232 additions
and
290 deletions
+232
-290
torchvision/models/detection/mask_rcnn.py
torchvision/models/detection/mask_rcnn.py
+12
-19
torchvision/models/detection/retinanet.py
torchvision/models/detection/retinanet.py
+11
-18
torchvision/models/detection/roi_heads.py
torchvision/models/detection/roi_heads.py
+3
-3
torchvision/models/detection/ssd.py
torchvision/models/detection/ssd.py
+10
-30
torchvision/models/detection/ssdlite.py
torchvision/models/detection/ssdlite.py
+4
-13
torchvision/models/detection/transform.py
torchvision/models/detection/transform.py
+24
-16
torchvision/models/efficientnet.py
torchvision/models/efficientnet.py
+57
-43
torchvision/models/feature_extraction.py
torchvision/models/feature_extraction.py
+6
-6
torchvision/models/googlenet.py
torchvision/models/googlenet.py
+3
-13
torchvision/models/inception.py
torchvision/models/inception.py
+4
-14
torchvision/models/maxvit.py
torchvision/models/maxvit.py
+7
-4
torchvision/models/mnasnet.py
torchvision/models/mnasnet.py
+15
-7
torchvision/models/mobilenetv2.py
torchvision/models/mobilenetv2.py
+6
-13
torchvision/models/mobilenetv3.py
torchvision/models/mobilenetv3.py
+9
-15
torchvision/models/optical_flow/raft.py
torchvision/models/optical_flow/raft.py
+34
-5
torchvision/models/quantization/googlenet.py
torchvision/models/quantization/googlenet.py
+4
-15
torchvision/models/quantization/inception.py
torchvision/models/quantization/inception.py
+4
-15
torchvision/models/quantization/mobilenetv2.py
torchvision/models/quantization/mobilenetv2.py
+3
-13
torchvision/models/quantization/mobilenetv3.py
torchvision/models/quantization/mobilenetv3.py
+3
-13
torchvision/models/quantization/resnet.py
torchvision/models/quantization/resnet.py
+13
-15
No files found.
Too many changes to show.
To preserve performance only
370 of 370+
files are displayed.
Plain diff
Email patch
torchvision/models/detection/mask_rcnn.py
View file @
cc26cd81
...
...
@@ -31,9 +31,9 @@ class MaskRCNN(FasterRCNN):
The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
image, and should be in 0-1 range. Different images can have different sizes.
The behavior of the model changes depending if it is in training or evaluation mode.
The behavior of the model changes depending
on
if it is in training or evaluation mode.
During training, the model expects both the input tensors
,
a
s well as a
targets (list of dictionary),
During training, the model expects both the input tensors a
nd
targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
...
...
@@ -56,7 +56,7 @@ class MaskRCNN(FasterRCNN):
Args:
backbone (nn.Module): the network used to compute the features for the model.
It should contain a out_channels attribute, which indicates the number of output
It should contain a
n
out_channels attribute, which indicates the number of output
channels that each feature map has (and it should be the same for all feature maps).
The backbone should return a single Tensor or and OrderedDict[Tensor].
num_classes (int): number of output classes of the model (including the background).
...
...
@@ -123,7 +123,7 @@ class MaskRCNN(FasterRCNN):
>>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # MaskRCNN needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # so we need to add it here
>>> # so we need to add it here
,
>>> backbone.out_channels = 1280
>>>
>>> # let's make the RPN generate 5 x 3 anchors per spatial
...
...
@@ -370,6 +370,8 @@ class MaskRCNN_ResNet50_FPN_Weights(WeightsEnum):
"mask_map"
:
34.6
,
}
},
"_ops"
:
134.38
,
"_file_size"
:
169.84
,
"_docs"
:
"""These weights were produced by following a similar training recipe as on the paper."""
,
},
)
...
...
@@ -390,6 +392,8 @@ class MaskRCNN_ResNet50_FPN_V2_Weights(WeightsEnum):
"mask_map"
:
41.8
,
}
},
"_ops"
:
333.577
,
"_file_size"
:
177.219
,
"_docs"
:
"""These weights were produced using an enhanced training recipe to boost the model accuracy."""
,
},
)
...
...
@@ -418,9 +422,9 @@ def maskrcnn_resnet50_fpn(
The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
image, and should be in ``0-1`` range. Different images can have different sizes.
The behavior of the model changes depending if it is in training or evaluation mode.
The behavior of the model changes depending
on
if it is in training or evaluation mode.
During training, the model expects both the input tensors
,
a
s well as a
targets (list of dictionary),
During training, the model expects both the input tensors a
nd
targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
...
...
@@ -497,7 +501,7 @@ def maskrcnn_resnet50_fpn(
model
=
MaskRCNN
(
backbone
,
num_classes
=
num_classes
,
**
kwargs
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
if
weights
==
MaskRCNN_ResNet50_FPN_Weights
.
COCO_V1
:
overwrite_eps
(
model
,
0.0
)
...
...
@@ -578,17 +582,6 @@ def maskrcnn_resnet50_fpn_v2(
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
return
model
# The dictionary below is internal implementation detail and will be removed in v0.15
from
.._utils
import
_ModelURLs
model_urls
=
_ModelURLs
(
{
"maskrcnn_resnet50_fpn_coco"
:
MaskRCNN_ResNet50_FPN_Weights
.
COCO_V1
.
url
,
}
)
torchvision/models/detection/retinanet.py
View file @
cc26cd81
...
...
@@ -327,9 +327,9 @@ class RetinaNet(nn.Module):
The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
image, and should be in 0-1 range. Different images can have different sizes.
The behavior of the model changes depending if it is in training or evaluation mode.
The behavior of the model changes depending
on
if it is in training or evaluation mode.
During training, the model expects both the input tensors
,
a
s well as a
targets (list of dictionary),
During training, the model expects both the input tensors a
nd
targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
...
...
@@ -382,7 +382,7 @@ class RetinaNet(nn.Module):
>>> # only the features
>>> backbone = torchvision.models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT).features
>>> # RetinaNet needs to know the number of
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
>>> # output channels in a backbone. For mobilenet_v2, it's 1280
,
>>> # so we need to add it here
>>> backbone.out_channels = 1280
>>>
...
...
@@ -690,6 +690,8 @@ class RetinaNet_ResNet50_FPN_Weights(WeightsEnum):
"box_map"
:
36.4
,
}
},
"_ops"
:
151.54
,
"_file_size"
:
130.267
,
"_docs"
:
"""These weights were produced by following a similar training recipe as on the paper."""
,
},
)
...
...
@@ -709,6 +711,8 @@ class RetinaNet_ResNet50_FPN_V2_Weights(WeightsEnum):
"box_map"
:
41.5
,
}
},
"_ops"
:
152.238
,
"_file_size"
:
146.037
,
"_docs"
:
"""These weights were produced using an enhanced training recipe to boost the model accuracy."""
,
},
)
...
...
@@ -739,9 +743,9 @@ def retinanet_resnet50_fpn(
The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each
image, and should be in ``0-1`` range. Different images can have different sizes.
The behavior of the model changes depending if it is in training or evaluation mode.
The behavior of the model changes depending
on
if it is in training or evaluation mode.
During training, the model expects both the input tensors
,
a
s well as a
targets (list of dictionary),
During training, the model expects both the input tensors a
nd
targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
...
...
@@ -811,7 +815,7 @@ def retinanet_resnet50_fpn(
model
=
RetinaNet
(
backbone
,
num_classes
,
**
kwargs
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
if
weights
==
RetinaNet_ResNet50_FPN_Weights
.
COCO_V1
:
overwrite_eps
(
model
,
0.0
)
...
...
@@ -890,17 +894,6 @@ def retinanet_resnet50_fpn_v2(
model
=
RetinaNet
(
backbone
,
num_classes
,
anchor_generator
=
anchor_generator
,
head
=
head
,
**
kwargs
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
return
model
# The dictionary below is internal implementation detail and will be removed in v0.15
from
.._utils
import
_ModelURLs
model_urls
=
_ModelURLs
(
{
"retinanet_resnet50_fpn_coco"
:
RetinaNet_ResNet50_FPN_Weights
.
COCO_V1
.
url
,
}
)
torchvision/models/detection/roi_heads.py
View file @
cc26cd81
...
...
@@ -315,7 +315,7 @@ def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched
valid
=
torch
.
cat
(
valid
,
dim
=
0
).
to
(
dtype
=
torch
.
uint8
)
valid
=
torch
.
where
(
valid
)[
0
]
# torch.mean (in binary_cross_entropy_with_logits) does
'
nt
# torch.mean (in binary_cross_entropy_with_logits) doesn
'
t
# accept empty tensors, so handle it sepaartely
if
keypoint_targets
.
numel
()
==
0
or
len
(
valid
)
==
0
:
return
keypoint_logits
.
sum
()
*
0
...
...
@@ -746,7 +746,7 @@ class RoIHeads(nn.Module):
if
not
t
[
"boxes"
].
dtype
in
floating_point_types
:
raise
TypeError
(
f
"target boxes must of float type, instead got
{
t
[
'boxes'
].
dtype
}
"
)
if
not
t
[
"labels"
].
dtype
==
torch
.
int64
:
raise
TypeError
(
"target labels must of int64 type, instead got {t['labels'].dtype}"
)
raise
TypeError
(
f
"target labels must of int64 type, instead got
{
t
[
'labels'
].
dtype
}
"
)
if
self
.
has_keypoint
():
if
not
t
[
"keypoints"
].
dtype
==
torch
.
float32
:
raise
TypeError
(
f
"target keypoints must of float type, instead got
{
t
[
'keypoints'
].
dtype
}
"
)
...
...
@@ -787,7 +787,7 @@ class RoIHeads(nn.Module):
mask_proposals
=
[
p
[
"boxes"
]
for
p
in
result
]
if
self
.
training
:
if
matched_idxs
is
None
:
raise
ValueError
(
"if in train
n
ing, matched_idxs should not be None"
)
raise
ValueError
(
"if in training, matched_idxs should not be None"
)
# during training, only focus on positive boxes
num_images
=
len
(
proposals
)
...
...
torchvision/models/detection/ssd.py
View file @
cc26cd81
...
...
@@ -39,6 +39,8 @@ class SSD300_VGG16_Weights(WeightsEnum):
"box_map"
:
25.1
,
}
},
"_ops"
:
34.858
,
"_file_size"
:
135.988
,
"_docs"
:
"""These weights were produced by following a similar training recipe as on the paper."""
,
},
)
...
...
@@ -126,12 +128,12 @@ class SSD(nn.Module):
Implements SSD architecture from `"SSD: Single Shot MultiBox Detector" <https://arxiv.org/abs/1512.02325>`_.
The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
image, and should be in 0-1 range. Different images can have different sizes but they will be resized
image, and should be in 0-1 range. Different images can have different sizes
,
but they will be resized
to a fixed size before passing it to the backbone.
The behavior of the model changes depending if it is in training or evaluation mode.
The behavior of the model changes depending
on
if it is in training or evaluation mode.
During training, the model expects both the input tensors
,
a
s well as a
targets (list of dictionary),
During training, the model expects both the input tensors a
nd
targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
...
...
@@ -554,7 +556,7 @@ def _vgg_extractor(backbone: VGG, highres: bool, trainable_layers: int):
stage_indices
=
[
0
]
+
[
i
for
i
,
b
in
enumerate
(
backbone
)
if
isinstance
(
b
,
nn
.
MaxPool2d
)][:
-
1
]
num_stages
=
len
(
stage_indices
)
# find the index of the layer from which we wont freeze
# find the index of the layer from which we won
'
t freeze
torch
.
_assert
(
0
<=
trainable_layers
<=
num_stages
,
f
"trainable_layers should be in the range [0,
{
num_stages
}
]. Instead got
{
trainable_layers
}
"
,
...
...
@@ -588,12 +590,12 @@ def ssd300_vgg16(
.. betastatus:: detection module
The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
image, and should be in 0-1 range. Different images can have different sizes but they will be resized
image, and should be in 0-1 range. Different images can have different sizes
,
but they will be resized
to a fixed size before passing it to the backbone.
The behavior of the model changes depending if it is in training or evaluation mode.
The behavior of the model changes depending
on
if it is in training or evaluation mode.
During training, the model expects both the input tensors
,
a
s well as a
targets (list of dictionary),
During training, the model expects both the input tensors a
nd
targets (list of dictionary),
containing:
- boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
...
...
@@ -675,28 +677,6 @@ def ssd300_vgg16(
model
=
SSD
(
backbone
,
anchor_generator
,
(
300
,
300
),
num_classes
,
**
kwargs
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
return
model
# The dictionary below is internal implementation detail and will be removed in v0.15
from
.._utils
import
_ModelURLs
model_urls
=
_ModelURLs
(
{
"ssd300_vgg16_coco"
:
SSD300_VGG16_Weights
.
COCO_V1
.
url
,
}
)
backbone_urls
=
_ModelURLs
(
{
# We port the features of a VGG16 backbone trained by amdegroot because unlike the one on TorchVision, it uses
# the same input standardization method as the paper.
# Ref: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth
# Only the `features` weights have proper values, those on the `classifier` module are filled with nans.
"vgg16_features"
:
VGG16_Weights
.
IMAGENET1K_FEATURES
.
url
,
}
)
torchvision/models/detection/ssdlite.py
View file @
cc26cd81
...
...
@@ -172,7 +172,7 @@ def _mobilenet_extractor(
stage_indices
=
[
0
]
+
[
i
for
i
,
b
in
enumerate
(
backbone
)
if
getattr
(
b
,
"_is_cn"
,
False
)]
+
[
len
(
backbone
)
-
1
]
num_stages
=
len
(
stage_indices
)
# find the index of the layer from which we wont freeze
# find the index of the layer from which we won
'
t freeze
if
not
0
<=
trainable_layers
<=
num_stages
:
raise
ValueError
(
"trainable_layers should be in the range [0, {num_stages}], instead got {trainable_layers}"
)
freeze_before
=
len
(
backbone
)
if
trainable_layers
==
0
else
stage_indices
[
num_stages
-
trainable_layers
]
...
...
@@ -198,6 +198,8 @@ class SSDLite320_MobileNet_V3_Large_Weights(WeightsEnum):
"box_map"
:
21.3
,
}
},
"_ops"
:
0.583
,
"_file_size"
:
13.418
,
"_docs"
:
"""These weights were produced by following a similar training recipe as on the paper."""
,
},
)
...
...
@@ -324,17 +326,6 @@ def ssdlite320_mobilenet_v3_large(
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
return
model
# The dictionary below is internal implementation detail and will be removed in v0.15
from
.._utils
import
_ModelURLs
model_urls
=
_ModelURLs
(
{
"ssdlite320_mobilenet_v3_large_coco"
:
SSDLite320_MobileNet_V3_Large_Weights
.
COCO_V1
.
url
,
}
)
torchvision/models/detection/transform.py
View file @
cc26cd81
...
...
@@ -24,8 +24,8 @@ def _fake_cast_onnx(v: Tensor) -> float:
def
_resize_image_and_masks
(
image
:
Tensor
,
self_min_size
:
floa
t
,
self_max_size
:
floa
t
,
self_min_size
:
in
t
,
self_max_size
:
in
t
,
target
:
Optional
[
Dict
[
str
,
Tensor
]]
=
None
,
fixed_size
:
Optional
[
Tuple
[
int
,
int
]]
=
None
,
)
->
Tuple
[
Tensor
,
Optional
[
Dict
[
str
,
Tensor
]]]:
...
...
@@ -40,14 +40,24 @@ def _resize_image_and_masks(
if
fixed_size
is
not
None
:
size
=
[
fixed_size
[
1
],
fixed_size
[
0
]]
else
:
min_size
=
torch
.
min
(
im_shape
).
to
(
dtype
=
torch
.
float32
)
max_size
=
torch
.
max
(
im_shape
).
to
(
dtype
=
torch
.
float32
)
scale
=
torch
.
min
(
self_min_size
/
min_size
,
self_max_size
/
max_size
)
if
torch
.
jit
.
is_scripting
()
or
torchvision
.
_is_tracing
():
min_size
=
torch
.
min
(
im_shape
).
to
(
dtype
=
torch
.
float32
)
max_size
=
torch
.
max
(
im_shape
).
to
(
dtype
=
torch
.
float32
)
self_min_size_f
=
float
(
self_min_size
)
self_max_size_f
=
float
(
self_max_size
)
scale
=
torch
.
min
(
self_min_size_f
/
min_size
,
self_max_size_f
/
max_size
)
if
torchvision
.
_is_tracing
():
scale_factor
=
_fake_cast_onnx
(
scale
)
else
:
scale_factor
=
scale
.
item
()
if
torchvision
.
_is_tracing
():
scale_factor
=
_fake_cast_onnx
(
scale
)
else
:
scale_factor
=
scale
.
item
()
# Do it the normal way
min_size
=
min
(
im_shape
)
max_size
=
max
(
im_shape
)
scale_factor
=
min
(
self_min_size
/
min_size
,
self_max_size
/
max_size
)
recompute_scale_factor
=
True
image
=
torch
.
nn
.
functional
.
interpolate
(
...
...
@@ -76,7 +86,7 @@ class GeneralizedRCNNTransform(nn.Module):
Performs input / target transformation before feeding the data to a GeneralizedRCNN
model.
The transformations it perform are:
The transformations it perform
s
are:
- input normalization (mean subtraction and std division)
- input / target resizing to match min_size / max_size
...
...
@@ -158,9 +168,8 @@ class GeneralizedRCNNTransform(nn.Module):
def
torch_choice
(
self
,
k
:
List
[
int
])
->
int
:
"""
Implements `random.choice` via torch ops so it can be compiled with
TorchScript. Remove if https://github.com/pytorch/pytorch/issues/25803
is fixed.
Implements `random.choice` via torch ops, so it can be compiled with
TorchScript and we use PyTorch's RNG (not native RNG)
"""
index
=
int
(
torch
.
empty
(
1
).
uniform_
(
0.0
,
float
(
len
(
k
))).
item
())
return
k
[
index
]
...
...
@@ -174,11 +183,10 @@ class GeneralizedRCNNTransform(nn.Module):
if
self
.
training
:
if
self
.
_skip_resize
:
return
image
,
target
size
=
float
(
self
.
torch_choice
(
self
.
min_size
)
)
size
=
self
.
torch_choice
(
self
.
min_size
)
else
:
# FIXME assume for now that testing uses the largest scale
size
=
float
(
self
.
min_size
[
-
1
])
image
,
target
=
_resize_image_and_masks
(
image
,
size
,
float
(
self
.
max_size
),
target
,
self
.
fixed_size
)
size
=
self
.
min_size
[
-
1
]
image
,
target
=
_resize_image_and_masks
(
image
,
size
,
self
.
max_size
,
target
,
self
.
fixed_size
)
if
target
is
None
:
return
image
,
target
...
...
torchvision/models/efficientnet.py
View file @
cc26cd81
import
copy
import
math
import
warnings
from
dataclasses
import
dataclass
from
functools
import
partial
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Sequence
,
Tuple
,
Union
...
...
@@ -239,7 +238,6 @@ class EfficientNet(nn.Module):
num_classes
:
int
=
1000
,
norm_layer
:
Optional
[
Callable
[...,
nn
.
Module
]]
=
None
,
last_channel
:
Optional
[
int
]
=
None
,
**
kwargs
:
Any
,
)
->
None
:
"""
EfficientNet V1 and V2 main class
...
...
@@ -263,16 +261,6 @@ class EfficientNet(nn.Module):
):
raise
TypeError
(
"The inverted_residual_setting should be List[MBConvConfig]"
)
if
"block"
in
kwargs
:
warnings
.
warn
(
"The parameter 'block' is deprecated since 0.13 and will be removed 0.15. "
"Please pass this information on 'MBConvConfig.block' instead."
)
if
kwargs
[
"block"
]
is
not
None
:
for
s
in
inverted_residual_setting
:
if
isinstance
(
s
,
MBConvConfig
):
s
.
block
=
kwargs
[
"block"
]
if
norm_layer
is
None
:
norm_layer
=
nn
.
BatchNorm2d
...
...
@@ -369,7 +357,7 @@ def _efficientnet(
model
=
EfficientNet
(
inverted_residual_setting
,
dropout
,
last_channel
=
last_channel
,
**
kwargs
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
return
model
...
...
@@ -464,6 +452,8 @@ class EfficientNet_B0_Weights(WeightsEnum):
"acc@5"
:
93.532
,
}
},
"_ops"
:
0.386
,
"_file_size"
:
20.451
,
"_docs"
:
"""These weights are ported from the original paper."""
,
},
)
...
...
@@ -473,7 +463,7 @@ class EfficientNet_B0_Weights(WeightsEnum):
class
EfficientNet_B1_Weights
(
WeightsEnum
):
IMAGENET1K_V1
=
Weights
(
# Weights ported from https://github.com/rwightman/pytorch-image-models/
url
=
"https://download.pytorch.org/models/efficientnet_b1_rwightman-
533bc792
.pth"
,
url
=
"https://download.pytorch.org/models/efficientnet_b1_rwightman-
bac287d4
.pth"
,
transforms
=
partial
(
ImageClassification
,
crop_size
=
240
,
resize_size
=
256
,
interpolation
=
InterpolationMode
.
BICUBIC
),
...
...
@@ -486,6 +476,8 @@ class EfficientNet_B1_Weights(WeightsEnum):
"acc@5"
:
94.186
,
}
},
"_ops"
:
0.687
,
"_file_size"
:
30.134
,
"_docs"
:
"""These weights are ported from the original paper."""
,
},
)
...
...
@@ -504,6 +496,8 @@ class EfficientNet_B1_Weights(WeightsEnum):
"acc@5"
:
94.934
,
}
},
"_ops"
:
0.687
,
"_file_size"
:
30.136
,
"_docs"
:
"""
These weights improve upon the results of the original paper by using a modified version of TorchVision's
`new training recipe
...
...
@@ -530,6 +524,8 @@ class EfficientNet_B2_Weights(WeightsEnum):
"acc@5"
:
95.310
,
}
},
"_ops"
:
1.088
,
"_file_size"
:
35.174
,
"_docs"
:
"""These weights are ported from the original paper."""
,
},
)
...
...
@@ -552,6 +548,8 @@ class EfficientNet_B3_Weights(WeightsEnum):
"acc@5"
:
96.054
,
}
},
"_ops"
:
1.827
,
"_file_size"
:
47.184
,
"_docs"
:
"""These weights are ported from the original paper."""
,
},
)
...
...
@@ -574,6 +572,8 @@ class EfficientNet_B4_Weights(WeightsEnum):
"acc@5"
:
96.594
,
}
},
"_ops"
:
4.394
,
"_file_size"
:
74.489
,
"_docs"
:
"""These weights are ported from the original paper."""
,
},
)
...
...
@@ -596,6 +596,8 @@ class EfficientNet_B5_Weights(WeightsEnum):
"acc@5"
:
96.628
,
}
},
"_ops"
:
10.266
,
"_file_size"
:
116.864
,
"_docs"
:
"""These weights are ported from the original paper."""
,
},
)
...
...
@@ -618,6 +620,8 @@ class EfficientNet_B6_Weights(WeightsEnum):
"acc@5"
:
96.916
,
}
},
"_ops"
:
19.068
,
"_file_size"
:
165.362
,
"_docs"
:
"""These weights are ported from the original paper."""
,
},
)
...
...
@@ -640,6 +644,8 @@ class EfficientNet_B7_Weights(WeightsEnum):
"acc@5"
:
96.908
,
}
},
"_ops"
:
37.746
,
"_file_size"
:
254.675
,
"_docs"
:
"""These weights are ported from the original paper."""
,
},
)
...
...
@@ -664,6 +670,8 @@ class EfficientNet_V2_S_Weights(WeightsEnum):
"acc@5"
:
96.878
,
}
},
"_ops"
:
8.366
,
"_file_size"
:
82.704
,
"_docs"
:
"""
These weights improve upon the results of the original paper by using a modified version of TorchVision's
`new training recipe
...
...
@@ -692,6 +700,8 @@ class EfficientNet_V2_M_Weights(WeightsEnum):
"acc@5"
:
97.156
,
}
},
"_ops"
:
24.582
,
"_file_size"
:
208.01
,
"_docs"
:
"""
These weights improve upon the results of the original paper by using a modified version of TorchVision's
`new training recipe
...
...
@@ -723,6 +733,8 @@ class EfficientNet_V2_L_Weights(WeightsEnum):
"acc@5"
:
97.788
,
}
},
"_ops"
:
56.08
,
"_file_size"
:
454.573
,
"_docs"
:
"""These weights are ported from the original paper."""
,
},
)
...
...
@@ -755,7 +767,9 @@ def efficientnet_b0(
weights
=
EfficientNet_B0_Weights
.
verify
(
weights
)
inverted_residual_setting
,
last_channel
=
_efficientnet_conf
(
"efficientnet_b0"
,
width_mult
=
1.0
,
depth_mult
=
1.0
)
return
_efficientnet
(
inverted_residual_setting
,
0.2
,
last_channel
,
weights
,
progress
,
**
kwargs
)
return
_efficientnet
(
inverted_residual_setting
,
kwargs
.
pop
(
"dropout"
,
0.2
),
last_channel
,
weights
,
progress
,
**
kwargs
)
@
register_model
()
...
...
@@ -784,7 +798,9 @@ def efficientnet_b1(
weights
=
EfficientNet_B1_Weights
.
verify
(
weights
)
inverted_residual_setting
,
last_channel
=
_efficientnet_conf
(
"efficientnet_b1"
,
width_mult
=
1.0
,
depth_mult
=
1.1
)
return
_efficientnet
(
inverted_residual_setting
,
0.2
,
last_channel
,
weights
,
progress
,
**
kwargs
)
return
_efficientnet
(
inverted_residual_setting
,
kwargs
.
pop
(
"dropout"
,
0.2
),
last_channel
,
weights
,
progress
,
**
kwargs
)
@
register_model
()
...
...
@@ -813,7 +829,9 @@ def efficientnet_b2(
weights
=
EfficientNet_B2_Weights
.
verify
(
weights
)
inverted_residual_setting
,
last_channel
=
_efficientnet_conf
(
"efficientnet_b2"
,
width_mult
=
1.1
,
depth_mult
=
1.2
)
return
_efficientnet
(
inverted_residual_setting
,
0.3
,
last_channel
,
weights
,
progress
,
**
kwargs
)
return
_efficientnet
(
inverted_residual_setting
,
kwargs
.
pop
(
"dropout"
,
0.3
),
last_channel
,
weights
,
progress
,
**
kwargs
)
@
register_model
()
...
...
@@ -842,7 +860,14 @@ def efficientnet_b3(
weights
=
EfficientNet_B3_Weights
.
verify
(
weights
)
inverted_residual_setting
,
last_channel
=
_efficientnet_conf
(
"efficientnet_b3"
,
width_mult
=
1.2
,
depth_mult
=
1.4
)
return
_efficientnet
(
inverted_residual_setting
,
0.3
,
last_channel
,
weights
,
progress
,
**
kwargs
)
return
_efficientnet
(
inverted_residual_setting
,
kwargs
.
pop
(
"dropout"
,
0.3
),
last_channel
,
weights
,
progress
,
**
kwargs
,
)
@
register_model
()
...
...
@@ -871,7 +896,14 @@ def efficientnet_b4(
weights
=
EfficientNet_B4_Weights
.
verify
(
weights
)
inverted_residual_setting
,
last_channel
=
_efficientnet_conf
(
"efficientnet_b4"
,
width_mult
=
1.4
,
depth_mult
=
1.8
)
return
_efficientnet
(
inverted_residual_setting
,
0.4
,
last_channel
,
weights
,
progress
,
**
kwargs
)
return
_efficientnet
(
inverted_residual_setting
,
kwargs
.
pop
(
"dropout"
,
0.4
),
last_channel
,
weights
,
progress
,
**
kwargs
,
)
@
register_model
()
...
...
@@ -902,7 +934,7 @@ def efficientnet_b5(
inverted_residual_setting
,
last_channel
=
_efficientnet_conf
(
"efficientnet_b5"
,
width_mult
=
1.6
,
depth_mult
=
2.2
)
return
_efficientnet
(
inverted_residual_setting
,
0.4
,
kwargs
.
pop
(
"dropout"
,
0.4
)
,
last_channel
,
weights
,
progress
,
...
...
@@ -939,7 +971,7 @@ def efficientnet_b6(
inverted_residual_setting
,
last_channel
=
_efficientnet_conf
(
"efficientnet_b6"
,
width_mult
=
1.8
,
depth_mult
=
2.6
)
return
_efficientnet
(
inverted_residual_setting
,
0.5
,
kwargs
.
pop
(
"dropout"
,
0.5
)
,
last_channel
,
weights
,
progress
,
...
...
@@ -976,7 +1008,7 @@ def efficientnet_b7(
inverted_residual_setting
,
last_channel
=
_efficientnet_conf
(
"efficientnet_b7"
,
width_mult
=
2.0
,
depth_mult
=
3.1
)
return
_efficientnet
(
inverted_residual_setting
,
0.5
,
kwargs
.
pop
(
"dropout"
,
0.5
)
,
last_channel
,
weights
,
progress
,
...
...
@@ -1014,7 +1046,7 @@ def efficientnet_v2_s(
inverted_residual_setting
,
last_channel
=
_efficientnet_conf
(
"efficientnet_v2_s"
)
return
_efficientnet
(
inverted_residual_setting
,
0.2
,
kwargs
.
pop
(
"dropout"
,
0.2
)
,
last_channel
,
weights
,
progress
,
...
...
@@ -1052,7 +1084,7 @@ def efficientnet_v2_m(
inverted_residual_setting
,
last_channel
=
_efficientnet_conf
(
"efficientnet_v2_m"
)
return
_efficientnet
(
inverted_residual_setting
,
0.3
,
kwargs
.
pop
(
"dropout"
,
0.3
)
,
last_channel
,
weights
,
progress
,
...
...
@@ -1090,28 +1122,10 @@ def efficientnet_v2_l(
inverted_residual_setting
,
last_channel
=
_efficientnet_conf
(
"efficientnet_v2_l"
)
return
_efficientnet
(
inverted_residual_setting
,
0.4
,
kwargs
.
pop
(
"dropout"
,
0.4
)
,
last_channel
,
weights
,
progress
,
norm_layer
=
partial
(
nn
.
BatchNorm2d
,
eps
=
1e-03
),
**
kwargs
,
)
# The dictionary below is internal implementation detail and will be removed in v0.15
from
._utils
import
_ModelURLs
model_urls
=
_ModelURLs
(
{
"efficientnet_b0"
:
EfficientNet_B0_Weights
.
IMAGENET1K_V1
.
url
,
"efficientnet_b1"
:
EfficientNet_B1_Weights
.
IMAGENET1K_V1
.
url
,
"efficientnet_b2"
:
EfficientNet_B2_Weights
.
IMAGENET1K_V1
.
url
,
"efficientnet_b3"
:
EfficientNet_B3_Weights
.
IMAGENET1K_V1
.
url
,
"efficientnet_b4"
:
EfficientNet_B4_Weights
.
IMAGENET1K_V1
.
url
,
"efficientnet_b5"
:
EfficientNet_B5_Weights
.
IMAGENET1K_V1
.
url
,
"efficientnet_b6"
:
EfficientNet_B6_Weights
.
IMAGENET1K_V1
.
url
,
"efficientnet_b7"
:
EfficientNet_B7_Weights
.
IMAGENET1K_V1
.
url
,
}
)
torchvision/models/feature_extraction.py
View file @
cc26cd81
...
...
@@ -18,7 +18,7 @@ __all__ = ["create_feature_extractor", "get_graph_node_names"]
class
LeafModuleAwareTracer
(
fx
.
Tracer
):
"""
An fx.Tracer that allows the user to specify a set of leaf modules, ie.
An fx.Tracer that allows the user to specify a set of leaf modules, i
.
e.
modules that are not to be traced through. The resulting graph ends up
having single nodes referencing calls to the leaf modules' forward methods.
"""
...
...
@@ -103,7 +103,7 @@ class NodePathTracer(LeafModuleAwareTracer):
if
node
.
op
!=
"call_module"
:
# In this case module_qualname from torch.fx doesn't go all the
# way to the leaf function/op so we need to append it
# way to the leaf function/op
,
so we need to append it
if
len
(
node_qualname
)
>
0
:
# Only append '.' if we are deeper than the top level module
node_qualname
+=
"."
...
...
@@ -136,7 +136,7 @@ class NodePathTracer(LeafModuleAwareTracer):
def
_is_subseq
(
x
,
y
):
"""Check if y is a subseqence of x
"""Check if y is a subseq
u
ence of x
https://stackoverflow.com/a/24017747/4391249
"""
iter_x
=
iter
(
x
)
...
...
@@ -228,7 +228,7 @@ def get_graph_node_names(
tracer_kwargs (dict, optional): a dictionary of keyword arguments for
``NodePathTracer`` (they are eventually passed onto
`torch.fx.Tracer <https://pytorch.org/docs/stable/fx.html#torch.fx.Tracer>`_).
By default it will be set to wrap and make leaf nodes all torchvision ops:
By default
,
it will be set to wrap and make leaf nodes all torchvision ops:
{"autowrap_modules": (math, torchvision.ops,),"leaf_modules": _get_leaf_modules_for_ops(),}
WARNING: In case the user provides tracer_kwargs, above default arguments will be appended to the user
provided dictionary.
...
...
@@ -391,7 +391,7 @@ def create_feature_extractor(
tracer_kwargs (dict, optional): a dictionary of keyword arguments for
``NodePathTracer`` (which passes them onto it's parent class
`torch.fx.Tracer <https://pytorch.org/docs/stable/fx.html#torch.fx.Tracer>`_).
By default it will be set to wrap and make leaf nodes all torchvision ops:
By default
,
it will be set to wrap and make leaf nodes all torchvision ops:
{"autowrap_modules": (math, torchvision.ops,),"leaf_modules": _get_leaf_modules_for_ops(),}
WARNING: In case the user provides tracer_kwargs, above default arguments will be appended to the user
provided dictionary.
...
...
@@ -544,7 +544,7 @@ def create_feature_extractor(
graph_module
.
graph
.
eliminate_dead_code
()
graph_module
.
recompile
()
# Keep track of the tracer and graph so we can choose the main one
# Keep track of the tracer and graph
,
so we can choose the main one
tracers
[
mode
]
=
tracer
graphs
[
mode
]
=
graph
...
...
torchvision/models/googlenet.py
View file @
cc26cd81
...
...
@@ -290,6 +290,8 @@ class GoogLeNet_Weights(WeightsEnum):
"acc@5"
:
89.530
,
}
},
"_ops"
:
1.498
,
"_file_size"
:
49.731
,
"_docs"
:
"""These weights are ported from the original paper."""
,
},
)
...
...
@@ -330,7 +332,7 @@ def googlenet(*, weights: Optional[GoogLeNet_Weights] = None, progress: bool = T
model
=
GoogLeNet
(
**
kwargs
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
if
not
original_aux_logits
:
model
.
aux_logits
=
False
model
.
aux1
=
None
# type: ignore[assignment]
...
...
@@ -341,15 +343,3 @@ def googlenet(*, weights: Optional[GoogLeNet_Weights] = None, progress: bool = T
)
return
model
# The dictionary below is internal implementation detail and will be removed in v0.15
from
._utils
import
_ModelURLs
model_urls
=
_ModelURLs
(
{
# GoogLeNet ported from TensorFlow
"googlenet"
:
GoogLeNet_Weights
.
IMAGENET1K_V1
.
url
,
}
)
torchvision/models/inception.py
View file @
cc26cd81
...
...
@@ -48,7 +48,7 @@ class Inception3(nn.Module):
)
init_weights
=
True
if
len
(
inception_blocks
)
!=
7
:
raise
ValueError
(
f
"leng
h
t of inception_blocks should be 7 instead of
{
len
(
inception_blocks
)
}
"
)
raise
ValueError
(
f
"lengt
h
of inception_blocks should be 7 instead of
{
len
(
inception_blocks
)
}
"
)
conv_block
=
inception_blocks
[
0
]
inception_a
=
inception_blocks
[
1
]
inception_b
=
inception_blocks
[
2
]
...
...
@@ -422,6 +422,8 @@ class Inception_V3_Weights(WeightsEnum):
"acc@5"
:
93.450
,
}
},
"_ops"
:
5.713
,
"_file_size"
:
103.903
,
"_docs"
:
"""These weights are ported from the original paper."""
,
},
)
...
...
@@ -468,21 +470,9 @@ def inception_v3(*, weights: Optional[Inception_V3_Weights] = None, progress: bo
model
=
Inception3
(
**
kwargs
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
if
not
original_aux_logits
:
model
.
aux_logits
=
False
model
.
AuxLogits
=
None
return
model
# The dictionary below is internal implementation detail and will be removed in v0.15
from
._utils
import
_ModelURLs
model_urls
=
_ModelURLs
(
{
# Inception v3 ported from TensorFlow
"inception_v3_google"
:
Inception_V3_Weights
.
IMAGENET1K_V1
.
url
,
}
)
torchvision/models/maxvit.py
View file @
cc26cd81
import
math
from
collections
import
OrderedDict
from
functools
import
partial
from
typing
import
Any
,
Callable
,
List
,
Optional
,
OrderedDict
,
Sequence
,
Tuple
from
typing
import
Any
,
Callable
,
List
,
Optional
,
Sequence
,
Tuple
import
numpy
as
np
import
torch
...
...
@@ -300,7 +301,7 @@ class PartitionAttentionLayer(nn.Module):
self
,
in_channels
:
int
,
head_dim
:
int
,
# partitioning parameter
e
s
# partitioning parameters
partition_size
:
int
,
partition_type
:
str
,
# grid size needs to be known at initialization time
...
...
@@ -426,7 +427,7 @@ class MaxVitLayer(nn.Module):
)
->
None
:
super
().
__init__
()
layers
:
OrderedDict
[
str
,
Any
]
=
OrderedDict
()
# type: ignore
layers
:
OrderedDict
=
OrderedDict
()
# convolutional layer
layers
[
"MBconv"
]
=
MBConv
(
...
...
@@ -762,7 +763,7 @@ def _maxvit(
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
return
model
...
...
@@ -785,6 +786,8 @@ class MaxVit_T_Weights(WeightsEnum):
"acc@5"
:
96.722
,
}
},
"_ops"
:
5.558
,
"_file_size"
:
118.769
,
"_docs"
:
"""These weights reproduce closely the results of the paper using a similar training recipe."""
,
},
)
...
...
torchvision/models/mnasnet.py
View file @
cc26cd81
...
...
@@ -88,14 +88,14 @@ def _round_to_multiple_of(val: float, divisor: int, round_up_bias: float = 0.9)
def
_get_depths
(
alpha
:
float
)
->
List
[
int
]:
"""Scales tensor depths as in reference MobileNet code, prefers rouding up
"""Scales tensor depths as in reference MobileNet code, prefers rou
n
ding up
rather than down."""
depths
=
[
32
,
16
,
24
,
40
,
80
,
96
,
192
,
320
]
return
[
_round_to_multiple_of
(
depth
*
alpha
,
8
)
for
depth
in
depths
]
class
MNASNet
(
torch
.
nn
.
Module
):
"""MNASNet, as described in https://arxiv.org/
pdf
/1807.11626.
pdf.
This
"""MNASNet, as described in https://arxiv.org/
abs
/1807.11626. This
implements the B1 variant of the model.
>>> model = MNASNet(1.0, num_classes=1000)
>>> x = torch.rand(1, 3, 224, 224)
...
...
@@ -231,6 +231,8 @@ class MNASNet0_5_Weights(WeightsEnum):
"acc@5"
:
87.490
,
}
},
"_ops"
:
0.104
,
"_file_size"
:
8.591
,
"_docs"
:
"""These weights reproduce closely the results of the paper."""
,
},
)
...
...
@@ -251,6 +253,8 @@ class MNASNet0_75_Weights(WeightsEnum):
"acc@5"
:
90.496
,
}
},
"_ops"
:
0.215
,
"_file_size"
:
12.303
,
"_docs"
:
"""
These weights were trained from scratch by using TorchVision's `new training recipe
<https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
...
...
@@ -273,6 +277,8 @@ class MNASNet1_0_Weights(WeightsEnum):
"acc@5"
:
91.510
,
}
},
"_ops"
:
0.314
,
"_file_size"
:
16.915
,
"_docs"
:
"""These weights reproduce closely the results of the paper."""
,
},
)
...
...
@@ -293,6 +299,8 @@ class MNASNet1_3_Weights(WeightsEnum):
"acc@5"
:
93.522
,
}
},
"_ops"
:
0.526
,
"_file_size"
:
24.246
,
"_docs"
:
"""
These weights were trained from scratch by using TorchVision's `new training recipe
<https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
...
...
@@ -309,7 +317,7 @@ def _mnasnet(alpha: float, weights: Optional[WeightsEnum], progress: bool, **kwa
model
=
MNASNet
(
alpha
,
**
kwargs
)
if
weights
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
return
model
...
...
@@ -319,7 +327,7 @@ def _mnasnet(alpha: float, weights: Optional[WeightsEnum], progress: bool, **kwa
def
mnasnet0_5
(
*
,
weights
:
Optional
[
MNASNet0_5_Weights
]
=
None
,
progress
:
bool
=
True
,
**
kwargs
:
Any
)
->
MNASNet
:
"""MNASNet with depth multiplier of 0.5 from
`MnasNet: Platform-Aware Neural Architecture Search for Mobile
<https://arxiv.org/
pdf
/1807.11626
.pdf
>`_ paper.
<https://arxiv.org/
abs
/1807.11626>`_ paper.
Args:
weights (:class:`~torchvision.models.MNASNet0_5_Weights`, optional): The
...
...
@@ -347,7 +355,7 @@ def mnasnet0_5(*, weights: Optional[MNASNet0_5_Weights] = None, progress: bool =
def
mnasnet0_75
(
*
,
weights
:
Optional
[
MNASNet0_75_Weights
]
=
None
,
progress
:
bool
=
True
,
**
kwargs
:
Any
)
->
MNASNet
:
"""MNASNet with depth multiplier of 0.75 from
`MnasNet: Platform-Aware Neural Architecture Search for Mobile
<https://arxiv.org/
pdf
/1807.11626
.pdf
>`_ paper.
<https://arxiv.org/
abs
/1807.11626>`_ paper.
Args:
weights (:class:`~torchvision.models.MNASNet0_75_Weights`, optional): The
...
...
@@ -375,7 +383,7 @@ def mnasnet0_75(*, weights: Optional[MNASNet0_75_Weights] = None, progress: bool
def
mnasnet1_0
(
*
,
weights
:
Optional
[
MNASNet1_0_Weights
]
=
None
,
progress
:
bool
=
True
,
**
kwargs
:
Any
)
->
MNASNet
:
"""MNASNet with depth multiplier of 1.0 from
`MnasNet: Platform-Aware Neural Architecture Search for Mobile
<https://arxiv.org/
pdf
/1807.11626
.pdf
>`_ paper.
<https://arxiv.org/
abs
/1807.11626>`_ paper.
Args:
weights (:class:`~torchvision.models.MNASNet1_0_Weights`, optional): The
...
...
@@ -403,7 +411,7 @@ def mnasnet1_0(*, weights: Optional[MNASNet1_0_Weights] = None, progress: bool =
def
mnasnet1_3
(
*
,
weights
:
Optional
[
MNASNet1_3_Weights
]
=
None
,
progress
:
bool
=
True
,
**
kwargs
:
Any
)
->
MNASNet
:
"""MNASNet with depth multiplier of 1.3 from
`MnasNet: Platform-Aware Neural Architecture Search for Mobile
<https://arxiv.org/
pdf
/1807.11626
.pdf
>`_ paper.
<https://arxiv.org/
abs
/1807.11626>`_ paper.
Args:
weights (:class:`~torchvision.models.MNASNet1_3_Weights`, optional): The
...
...
torchvision/models/mobilenetv2.py
View file @
cc26cd81
...
...
@@ -23,7 +23,7 @@ class InvertedResidual(nn.Module):
super
().
__init__
()
self
.
stride
=
stride
if
stride
not
in
[
1
,
2
]:
raise
ValueError
(
f
"stride should be 1 or 2 insted of
{
stride
}
"
)
raise
ValueError
(
f
"stride should be 1 or 2 inste
a
d of
{
stride
}
"
)
if
norm_layer
is
None
:
norm_layer
=
nn
.
BatchNorm2d
...
...
@@ -194,6 +194,8 @@ class MobileNet_V2_Weights(WeightsEnum):
"acc@5"
:
90.286
,
}
},
"_ops"
:
0.301
,
"_file_size"
:
13.555
,
"_docs"
:
"""These weights reproduce closely the results of the paper using a simple training recipe."""
,
},
)
...
...
@@ -209,6 +211,8 @@ class MobileNet_V2_Weights(WeightsEnum):
"acc@5"
:
90.822
,
}
},
"_ops"
:
0.301
,
"_file_size"
:
13.598
,
"_docs"
:
"""
These weights improve upon the results of the original paper by using a modified version of TorchVision's
`new training recipe
...
...
@@ -251,17 +255,6 @@ def mobilenet_v2(
model
=
MobileNetV2
(
**
kwargs
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
return
model
# The dictionary below is internal implementation detail and will be removed in v0.15
from
._utils
import
_ModelURLs
model_urls
=
_ModelURLs
(
{
"mobilenet_v2"
:
MobileNet_V2_Weights
.
IMAGENET1K_V1
.
url
,
}
)
torchvision/models/mobilenetv3.py
View file @
cc26cd81
...
...
@@ -282,7 +282,7 @@ def _mobilenet_v3(
model
=
MobileNetV3
(
inverted_residual_setting
,
last_channel
,
**
kwargs
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
return
model
...
...
@@ -307,6 +307,8 @@ class MobileNet_V3_Large_Weights(WeightsEnum):
"acc@5"
:
91.340
,
}
},
"_ops"
:
0.217
,
"_file_size"
:
21.114
,
"_docs"
:
"""These weights were trained from scratch by using a simple training recipe."""
,
},
)
...
...
@@ -323,6 +325,8 @@ class MobileNet_V3_Large_Weights(WeightsEnum):
"acc@5"
:
92.566
,
}
},
"_ops"
:
0.217
,
"_file_size"
:
21.107
,
"_docs"
:
"""
These weights improve marginally upon the results of the original paper by using a modified version of
TorchVision's `new training recipe
...
...
@@ -347,6 +351,8 @@ class MobileNet_V3_Small_Weights(WeightsEnum):
"acc@5"
:
87.402
,
}
},
"_ops"
:
0.057
,
"_file_size"
:
9.829
,
"_docs"
:
"""
These weights improve upon the results of the original paper by using a simple training recipe.
"""
,
...
...
@@ -372,7 +378,7 @@ def mobilenet_v3_large(
weights are used.
progress (bool, optional): If True, displays a progress bar of the
download to stderr. Default is True.
**kwargs: parameters passed to the ``torchvision.models.
res
net.MobileNetV3``
**kwargs: parameters passed to the ``torchvision.models.
mobile
net.MobileNetV3``
base class. Please refer to the `source code
<https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py>`_
for more details about this class.
...
...
@@ -403,7 +409,7 @@ def mobilenet_v3_small(
weights are used.
progress (bool, optional): If True, displays a progress bar of the
download to stderr. Default is True.
**kwargs: parameters passed to the ``torchvision.models.
res
net.MobileNetV3``
**kwargs: parameters passed to the ``torchvision.models.
mobile
net.MobileNetV3``
base class. Please refer to the `source code
<https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py>`_
for more details about this class.
...
...
@@ -415,15 +421,3 @@ def mobilenet_v3_small(
inverted_residual_setting
,
last_channel
=
_mobilenet_v3_conf
(
"mobilenet_v3_small"
,
**
kwargs
)
return
_mobilenet_v3
(
inverted_residual_setting
,
last_channel
,
weights
,
progress
,
**
kwargs
)
# The dictionary below is internal implementation detail and will be removed in v0.15
from
._utils
import
_ModelURLs
model_urls
=
_ModelURLs
(
{
"mobilenet_v3_large"
:
MobileNet_V3_Large_Weights
.
IMAGENET1K_V1
.
url
,
"mobilenet_v3_small"
:
MobileNet_V3_Small_Weights
.
IMAGENET1K_V1
.
url
,
}
)
torchvision/models/optical_flow/raft.py
View file @
cc26cd81
...
...
@@ -35,7 +35,7 @@ class ResidualBlock(nn.Module):
# But in the RAFT training reference, the BatchNorm2d layers are only activated for the first dataset,
# and frozen for the rest of the training process (i.e. set as eval()). The bias term is thus still useful
# for the rest of the datasets. Technically, we could remove the bias for other norm layers like Instance norm
# because these aren't frozen, but we don't bother (also, we woudn't be able to load the original weights).
# because these aren't frozen, but we don't bother (also, we wou
l
dn't be able to load the original weights).
self
.
convnormrelu1
=
Conv2dNormActivation
(
in_channels
,
out_channels
,
norm_layer
=
norm_layer
,
kernel_size
=
3
,
stride
=
stride
,
bias
=
True
)
...
...
@@ -318,7 +318,7 @@ class MaskPredictor(nn.Module):
def
__init__
(
self
,
*
,
in_channels
,
hidden_size
,
multiplier
=
0.25
):
super
().
__init__
()
self
.
convrelu
=
Conv2dNormActivation
(
in_channels
,
hidden_size
,
norm_layer
=
None
,
kernel_size
=
3
)
# 8 * 8 * 9 because the predicted flow is downsampled by 8, from the downsampling of the initial FeatureEncoder
# 8 * 8 * 9 because the predicted flow is downsampled by 8, from the downsampling of the initial FeatureEncoder
,
# and we interpolate with all 9 surrounding neighbors. See paper and appendix B.
self
.
conv
=
nn
.
Conv2d
(
hidden_size
,
8
*
8
*
9
,
1
,
padding
=
0
)
...
...
@@ -369,6 +369,19 @@ class CorrBlock(nn.Module):
raise
ValueError
(
f
"Input feature maps should have the same shape, instead got
{
fmap1
.
shape
}
(fmap1.shape) !=
{
fmap2
.
shape
}
(fmap2.shape)"
)
# Explaining min_fmap_size below: the fmaps are down-sampled (num_levels - 1) times by a factor of 2.
# The last corr_volume most have at least 2 values (hence the 2* factor), otherwise grid_sample() would
# produce nans in its output.
min_fmap_size
=
2
*
(
2
**
(
self
.
num_levels
-
1
))
if
any
(
fmap_size
<
min_fmap_size
for
fmap_size
in
fmap1
.
shape
[
-
2
:]):
raise
ValueError
(
"Feature maps are too small to be down-sampled by the correlation pyramid. "
f
"H and W of feature maps should be at least
{
min_fmap_size
}
; got:
{
fmap1
.
shape
[
-
2
:]
}
. "
"Remember that input images to the model are downsampled by 8, so that means their "
f
"dimensions should be at least 8 *
{
min_fmap_size
}
=
{
8
*
min_fmap_size
}
."
)
corr_volume
=
self
.
_compute_corr_volume
(
fmap1
,
fmap2
)
batch_size
,
h
,
w
,
num_channels
,
_
,
_
=
corr_volume
.
shape
# _, _ = h, w
...
...
@@ -430,7 +443,7 @@ class RAFT(nn.Module):
Its input is ``image1``. As in the original implementation, its output will be split into 2 parts:
- one part will be used as the actual "context", passed to the recurrent unit of the ``update_block``
- one part will be used to initialize the hidden state of the
of the
recurrent unit of
- one part will be used to initialize the hidden state of the recurrent unit of
the ``update_block``
These 2 parts are split according to the ``hidden_state_size`` of the ``update_block``, so the output
...
...
@@ -474,7 +487,7 @@ class RAFT(nn.Module):
if
(
h
,
w
)
!=
image2
.
shape
[
-
2
:]:
raise
ValueError
(
f
"input images should have the same shape, instead got (
{
h
}
,
{
w
}
) !=
{
image2
.
shape
[
-
2
:]
}
"
)
if
not
(
h
%
8
==
0
)
and
(
w
%
8
==
0
):
raise
ValueError
(
f
"input image H and W should be divisible by 8, insted got
{
h
}
(h) and
{
w
}
(w)"
)
raise
ValueError
(
f
"input image H and W should be divisible by 8, inste
a
d got
{
h
}
(h) and
{
w
}
(w)"
)
fmaps
=
self
.
feature_encoder
(
torch
.
cat
([
image1
,
image2
],
dim
=
0
))
fmap1
,
fmap2
=
torch
.
chunk
(
fmaps
,
chunks
=
2
,
dim
=
0
)
...
...
@@ -552,6 +565,8 @@ class Raft_Large_Weights(WeightsEnum):
"Sintel-Train-Finalpass"
:
{
"epe"
:
2.7894
},
"Kitti-Train"
:
{
"per_image_epe"
:
5.0172
,
"fl_all"
:
17.4506
},
},
"_ops"
:
211.007
,
"_file_size"
:
20.129
,
"_docs"
:
"""These weights were ported from the original paper. They
are trained on :class:`~torchvision.datasets.FlyingChairs` +
:class:`~torchvision.datasets.FlyingThings3D`."""
,
...
...
@@ -570,6 +585,8 @@ class Raft_Large_Weights(WeightsEnum):
"Sintel-Train-Finalpass"
:
{
"epe"
:
2.7161
},
"Kitti-Train"
:
{
"per_image_epe"
:
4.5118
,
"fl_all"
:
16.0679
},
},
"_ops"
:
211.007
,
"_file_size"
:
20.129
,
"_docs"
:
"""These weights were trained from scratch on
:class:`~torchvision.datasets.FlyingChairs` +
:class:`~torchvision.datasets.FlyingThings3D`."""
,
...
...
@@ -588,6 +605,8 @@ class Raft_Large_Weights(WeightsEnum):
"Sintel-Test-Cleanpass"
:
{
"epe"
:
1.94
},
"Sintel-Test-Finalpass"
:
{
"epe"
:
3.18
},
},
"_ops"
:
211.007
,
"_file_size"
:
20.129
,
"_docs"
:
"""
These weights were ported from the original paper. They are
trained on :class:`~torchvision.datasets.FlyingChairs` +
...
...
@@ -612,6 +631,8 @@ class Raft_Large_Weights(WeightsEnum):
"Sintel-Test-Cleanpass"
:
{
"epe"
:
1.819
},
"Sintel-Test-Finalpass"
:
{
"epe"
:
3.067
},
},
"_ops"
:
211.007
,
"_file_size"
:
20.129
,
"_docs"
:
"""
These weights were trained from scratch. They are
pre-trained on :class:`~torchvision.datasets.FlyingChairs` +
...
...
@@ -636,6 +657,8 @@ class Raft_Large_Weights(WeightsEnum):
"_metrics"
:
{
"Kitti-Test"
:
{
"fl_all"
:
5.10
},
},
"_ops"
:
211.007
,
"_file_size"
:
20.129
,
"_docs"
:
"""
These weights were ported from the original paper. They are
pre-trained on :class:`~torchvision.datasets.FlyingChairs` +
...
...
@@ -657,6 +680,8 @@ class Raft_Large_Weights(WeightsEnum):
"_metrics"
:
{
"Kitti-Test"
:
{
"fl_all"
:
5.19
},
},
"_ops"
:
211.007
,
"_file_size"
:
20.129
,
"_docs"
:
"""
These weights were trained from scratch. They are
pre-trained on :class:`~torchvision.datasets.FlyingChairs` +
...
...
@@ -698,6 +723,8 @@ class Raft_Small_Weights(WeightsEnum):
"Sintel-Train-Finalpass"
:
{
"epe"
:
3.2790
},
"Kitti-Train"
:
{
"per_image_epe"
:
7.6557
,
"fl_all"
:
25.2801
},
},
"_ops"
:
47.655
,
"_file_size"
:
3.821
,
"_docs"
:
"""These weights were ported from the original paper. They
are trained on :class:`~torchvision.datasets.FlyingChairs` +
:class:`~torchvision.datasets.FlyingThings3D`."""
,
...
...
@@ -715,6 +742,8 @@ class Raft_Small_Weights(WeightsEnum):
"Sintel-Train-Finalpass"
:
{
"epe"
:
3.2831
},
"Kitti-Train"
:
{
"per_image_epe"
:
7.5978
,
"fl_all"
:
25.2369
},
},
"_ops"
:
47.655
,
"_file_size"
:
3.821
,
"_docs"
:
"""These weights were trained from scratch on
:class:`~torchvision.datasets.FlyingChairs` +
:class:`~torchvision.datasets.FlyingThings3D`."""
,
...
...
@@ -802,7 +831,7 @@ def _raft(
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
return
model
...
...
torchvision/models/quantization/googlenet.py
View file @
cc26cd81
...
...
@@ -108,7 +108,7 @@ class QuantizableGoogLeNet(GoogLeNet):
class
GoogLeNet_QuantizedWeights
(
WeightsEnum
):
IMAGENET1K_FBGEMM_V1
=
Weights
(
url
=
"https://download.pytorch.org/models/quantized/googlenet_fbgemm-c
00238cf
.pth"
,
url
=
"https://download.pytorch.org/models/quantized/googlenet_fbgemm-c
81f6644
.pth"
,
transforms
=
partial
(
ImageClassification
,
crop_size
=
224
),
meta
=
{
"num_params"
:
6624904
,
...
...
@@ -123,6 +123,8 @@ class GoogLeNet_QuantizedWeights(WeightsEnum):
"acc@5"
:
89.404
,
}
},
"_ops"
:
1.498
,
"_file_size"
:
12.618
,
"_docs"
:
"""
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
weights listed below.
...
...
@@ -195,7 +197,7 @@ def googlenet(
quantize_model
(
model
,
backend
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
if
not
original_aux_logits
:
model
.
aux_logits
=
False
model
.
aux1
=
None
# type: ignore[assignment]
...
...
@@ -206,16 +208,3 @@ def googlenet(
)
return
model
# The dictionary below is internal implementation detail and will be removed in v0.15
from
.._utils
import
_ModelURLs
from
..googlenet
import
model_urls
# noqa: F401
quant_model_urls
=
_ModelURLs
(
{
# fp32 GoogLeNet ported from TensorFlow, with weights quantized in PyTorch
"googlenet_fbgemm"
:
GoogLeNet_QuantizedWeights
.
IMAGENET1K_FBGEMM_V1
.
url
,
}
)
torchvision/models/quantization/inception.py
View file @
cc26cd81
...
...
@@ -168,7 +168,7 @@ class QuantizableInception3(inception_module.Inception3):
class
Inception_V3_QuantizedWeights
(
WeightsEnum
):
IMAGENET1K_FBGEMM_V1
=
Weights
(
url
=
"https://download.pytorch.org/models/quantized/inception_v3_google_fbgemm-
71447a44
.pth"
,
url
=
"https://download.pytorch.org/models/quantized/inception_v3_google_fbgemm-
a2837893
.pth"
,
transforms
=
partial
(
ImageClassification
,
crop_size
=
299
,
resize_size
=
342
),
meta
=
{
"num_params"
:
27161264
,
...
...
@@ -183,6 +183,8 @@ class Inception_V3_QuantizedWeights(WeightsEnum):
"acc@5"
:
93.354
,
}
},
"_ops"
:
5.713
,
"_file_size"
:
23.146
,
"_docs"
:
"""
These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized
weights listed below.
...
...
@@ -263,22 +265,9 @@ def inception_v3(
if
quantize
and
not
original_aux_logits
:
model
.
aux_logits
=
False
model
.
AuxLogits
=
None
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
if
not
quantize
and
not
original_aux_logits
:
model
.
aux_logits
=
False
model
.
AuxLogits
=
None
return
model
# The dictionary below is internal implementation detail and will be removed in v0.15
from
.._utils
import
_ModelURLs
from
..inception
import
model_urls
# noqa: F401
quant_model_urls
=
_ModelURLs
(
{
# fp32 weights ported from TensorFlow, quantized in PyTorch
"inception_v3_google_fbgemm"
:
Inception_V3_QuantizedWeights
.
IMAGENET1K_FBGEMM_V1
.
url
,
}
)
torchvision/models/quantization/mobilenetv2.py
View file @
cc26cd81
...
...
@@ -80,6 +80,8 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum):
"acc@5"
:
90.150
,
}
},
"_ops"
:
0.301
,
"_file_size"
:
3.423
,
"_docs"
:
"""
These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized
weights listed below.
...
...
@@ -147,18 +149,6 @@ def mobilenet_v2(
quantize_model
(
model
,
backend
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
return
model
# The dictionary below is internal implementation detail and will be removed in v0.15
from
.._utils
import
_ModelURLs
from
..mobilenetv2
import
model_urls
# noqa: F401
quant_model_urls
=
_ModelURLs
(
{
"mobilenet_v2_qnnpack"
:
MobileNet_V2_QuantizedWeights
.
IMAGENET1K_QNNPACK_V1
.
url
,
}
)
torchvision/models/quantization/mobilenetv3.py
View file @
cc26cd81
...
...
@@ -149,7 +149,7 @@ def _mobilenet_v3_model(
torch
.
ao
.
quantization
.
prepare_qat
(
model
,
inplace
=
True
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
if
quantize
:
torch
.
ao
.
quantization
.
convert
(
model
,
inplace
=
True
)
...
...
@@ -175,6 +175,8 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum):
"acc@5"
:
90.858
,
}
},
"_ops"
:
0.217
,
"_file_size"
:
21.554
,
"_docs"
:
"""
These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized
weights listed below.
...
...
@@ -233,15 +235,3 @@ def mobilenet_v3_large(
inverted_residual_setting
,
last_channel
=
_mobilenet_v3_conf
(
"mobilenet_v3_large"
,
**
kwargs
)
return
_mobilenet_v3_model
(
inverted_residual_setting
,
last_channel
,
weights
,
progress
,
quantize
,
**
kwargs
)
# The dictionary below is internal implementation detail and will be removed in v0.15
from
.._utils
import
_ModelURLs
from
..mobilenetv3
import
model_urls
# noqa: F401
quant_model_urls
=
_ModelURLs
(
{
"mobilenet_v3_large_qnnpack"
:
MobileNet_V3_Large_QuantizedWeights
.
IMAGENET1K_QNNPACK_V1
.
url
,
}
)
torchvision/models/quantization/resnet.py
View file @
cc26cd81
...
...
@@ -144,7 +144,7 @@ def _resnet(
quantize_model
(
model
,
backend
)
if
weights
is
not
None
:
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
))
model
.
load_state_dict
(
weights
.
get_state_dict
(
progress
=
progress
,
check_hash
=
True
))
return
model
...
...
@@ -175,6 +175,8 @@ class ResNet18_QuantizedWeights(WeightsEnum):
"acc@5"
:
88.882
,
}
},
"_ops"
:
1.814
,
"_file_size"
:
11.238
,
},
)
DEFAULT
=
IMAGENET1K_FBGEMM_V1
...
...
@@ -194,6 +196,8 @@ class ResNet50_QuantizedWeights(WeightsEnum):
"acc@5"
:
92.814
,
}
},
"_ops"
:
4.089
,
"_file_size"
:
24.759
,
},
)
IMAGENET1K_FBGEMM_V2
=
Weights
(
...
...
@@ -209,6 +213,8 @@ class ResNet50_QuantizedWeights(WeightsEnum):
"acc@5"
:
94.976
,
}
},
"_ops"
:
4.089
,
"_file_size"
:
24.953
,
},
)
DEFAULT
=
IMAGENET1K_FBGEMM_V2
...
...
@@ -228,6 +234,8 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum):
"acc@5"
:
94.480
,
}
},
"_ops"
:
16.414
,
"_file_size"
:
86.034
,
},
)
IMAGENET1K_FBGEMM_V2
=
Weights
(
...
...
@@ -243,6 +251,8 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum):
"acc@5"
:
96.132
,
}
},
"_ops"
:
16.414
,
"_file_size"
:
86.645
,
},
)
DEFAULT
=
IMAGENET1K_FBGEMM_V2
...
...
@@ -263,6 +273,8 @@ class ResNeXt101_64X4D_QuantizedWeights(WeightsEnum):
"acc@5"
:
96.326
,
}
},
"_ops"
:
15.46
,
"_file_size"
:
81.556
,
},
)
DEFAULT
=
IMAGENET1K_FBGEMM_V1
...
...
@@ -470,17 +482,3 @@ def resnext101_64x4d(
_ovewrite_named_param
(
kwargs
,
"groups"
,
64
)
_ovewrite_named_param
(
kwargs
,
"width_per_group"
,
4
)
return
_resnet
(
QuantizableBottleneck
,
[
3
,
4
,
23
,
3
],
weights
,
progress
,
quantize
,
**
kwargs
)
# The dictionary below is internal implementation detail and will be removed in v0.15
from
.._utils
import
_ModelURLs
from
..resnet
import
model_urls
# noqa: F401
quant_model_urls
=
_ModelURLs
(
{
"resnet18_fbgemm"
:
ResNet18_QuantizedWeights
.
IMAGENET1K_FBGEMM_V1
.
url
,
"resnet50_fbgemm"
:
ResNet50_QuantizedWeights
.
IMAGENET1K_FBGEMM_V1
.
url
,
"resnext101_32x8d_fbgemm"
:
ResNeXt101_32X8D_QuantizedWeights
.
IMAGENET1K_FBGEMM_V1
.
url
,
}
)
Prev
1
…
12
13
14
15
16
17
18
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment