Replace asserts with exceptions (#5587)

* replace most asserts with exceptions * fix formating issues * fix linting and remove more asserts * fix regresion * fix regresion * fix bug * apply ufmt * apply ufmt * fix tests * fix format * fix None check * fix detection models tests * non scriptable any * add more checks for None values * fix retinanet test * fix retinanet test * Update references/classification/transforms.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update references/classification/transforms.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update references/optical_flow/transforms.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update references/optical_flow/transforms.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update references/optical_flow/transforms.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * make value checks more pythonic: * Update references/optical_flow/transforms.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * make value checks more pythonic * make more checks pythonic * fix bug * appy ufmt * fix tracing issues * fib typos * fix lint * remove unecessary f-strings * fix bug * Update torchvision/datasets/mnist.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/datasets/mnist.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/ops/boxes.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/ops/poolers.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/utils.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * address PR comments * Update torchvision/io/_video_opt.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/models/detection/generalized_rcnn.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/models/feature_extraction.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/models/optical_flow/raft.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * address PR comments * addressing further pr comments * fix bug * remove unecessary else * apply ufmt * last pr comment * replace RuntimeErrors Co-authored-by: Nicolas Hug <contact@nicolas-hug.com>

Replace asserts with exceptions (#5587)
* replace most asserts with exceptions * fix formating issues * fix linting and remove more asserts * fix regresion * fix regresion * fix bug * apply ufmt * apply ufmt * fix tests * fix format * fix None check * fix detection models tests * non scriptable any * add more checks for None values * fix retinanet test * fix retinanet test * Update references/classification/transforms.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update references/classification/transforms.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update references/optical_flow/transforms.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update references/optical_flow/transforms.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update references/optical_flow/transforms.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * make value checks more pythonic: * Update references/optical_flow/transforms.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * make value checks more pythonic * make more checks pythonic * fix bug * appy ufmt * fix tracing issues * fib typos * fix lint * remove unecessary f-strings * fix bug * Update torchvision/datasets/mnist.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/datasets/mnist.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/ops/boxes.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/ops/poolers.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/utils.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * address PR comments * Update torchvision/io/_video_opt.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/models/detection/generalized_rcnn.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/models/feature_extraction.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * Update torchvision/models/optical_flow/raft.py Co-authored-by: Nicolas Hug <contact@nicolas-hug.com> * address PR comments * addressing further pr comments * fix bug * remove unecessary else * apply ufmt * last pr comment * replace RuntimeErrors Co-authored-by: Nicolas Hug <contact@nicolas-hug.com>
289fce29 · Joao Gomes · GitHub · 9bbb777d · 289fce29 · 289fce29
Unverified Commit 289fce29 authored Mar 15, 2022 by Joao Gomes Committed by GitHub Mar 15, 2022
20 changed files
--- a/torchvision/models/detection/faster_rcnn.py
+++ b/torchvision/models/detection/faster_rcnn.py
@@ -187,8 +187,14 @@ class FasterRCNN(GeneralizedRCNN):
                "same for all the levels)"
            )
-        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
+        if not isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))):
-        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))
+            raise TypeError(
+                f"rpn_anchor_generator should be of type AnchorGenerator or None instead of {type(rpn_anchor_generator)}"
+            )
+        if not isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))):
+            raise TypeError(
+                f"box_roi_pool should be of type MultiScaleRoIAlign or None instead of {type(box_roi_pool)}"
+            )
        if num_classes is not None:
            if box_predictor is not None:
@@ -299,7 +305,10 @@ class FastRCNNPredictor(nn.Module):
    def forward(self, x):
        if x.dim() == 4:
-            assert list(x.shape[2:]) == [1, 1]
+            if list(x.shape[2:]) != [1, 1]:
+                raise ValueError(
+                    f"x has the wrong shape, expecting the last two dimensions to be [1,1] instead of {list(x.shape[2:])}"
+                )
        x = x.flatten(start_dim=1)
        scores = self.cls_score(x)
        bbox_deltas = self.bbox_pred(x)

--- a/torchvision/models/detection/fcos.py
+++ b/torchvision/models/detection/fcos.py
@@ -378,14 +378,20 @@ class FCOS(nn.Module):
            )
        self.backbone = backbone
-        assert isinstance(anchor_generator, (AnchorGenerator, type(None)))
+        if not isinstance(anchor_generator, (AnchorGenerator, type(None))):
+            raise TypeError(
+                f"anchor_generator should be of type AnchorGenerator or None, instead  got {type(anchor_generator)}"
+            )
        if anchor_generator is None:
            anchor_sizes = ((8,), (16,), (32,), (64,), (128,))  # equal to strides of multi-level feature map
            aspect_ratios = ((1.0,),) * len(anchor_sizes)  # set only one anchor
            anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        self.anchor_generator = anchor_generator
-        assert self.anchor_generator.num_anchors_per_location()[0] == 1
+        if self.anchor_generator.num_anchors_per_location()[0] != 1:
+            raise ValueError(
+                f"anchor_generator.num_anchors_per_location()[0] should be 1 instead of {anchor_generator.num_anchors_per_location()[0]}"
+            )
        if head is None:
            head = FCOSHead(backbone.out_channels, anchor_generator.num_anchors_per_location()[0], num_classes)
@@ -560,12 +566,15 @@ class FCOS(nn.Module):
                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
                        raise ValueError(f"Expected target boxes to be a tensor of shape [N, 4], got {boxes.shape}.")
                else:
-                    raise ValueError(f"Expected target boxes to be of type Tensor, got {type(boxes)}.")
+                    raise TypeError(f"Expected target boxes to be of type Tensor, got {type(boxes)}.")
        original_image_sizes: List[Tuple[int, int]] = []
        for img in images:
            val = img.shape[-2:]
-            assert len(val) == 2
+            if len(val) != 2:
+                raise ValueError(
+                    f"expecting the last two dimensions of the Tensor to be H and W instead got {img.shape[-2:]}"
+                )
            original_image_sizes.append((val[0], val[1]))
        # transform the input
@@ -603,9 +612,10 @@ class FCOS(nn.Module):
        losses = {}
        detections: List[Dict[str, Tensor]] = []
        if self.training:
-            assert targets is not None
            # compute the losses
+            if targets is None:
+                raise ValueError("targets should not be none when in training mode")
            losses = self.compute_loss(targets, head_outputs, anchors, num_anchors_per_level)
        else:
            # split outputs per level

--- a/torchvision/models/detection/generalized_rcnn.py
+++ b/torchvision/models/detection/generalized_rcnn.py
@@ -57,22 +57,25 @@ class GeneralizedRCNN(nn.Module):
                like `scores`, `labels` and `mask` (for Mask R-CNN models).
        """
-        if self.training and targets is None:
-            raise ValueError("In training mode, targets should be passed")
        if self.training:
-            assert targets is not None
+            if targets is None:
+                raise ValueError("In training mode, targets should be passed")
            for target in targets:
                boxes = target["boxes"]
                if isinstance(boxes, torch.Tensor):
                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
                        raise ValueError(f"Expected target boxes to be a tensor of shape [N, 4], got {boxes.shape}.")
                else:
-                    raise ValueError(f"Expected target boxes to be of type Tensor, got {type(boxes)}.")
+                    raise TypeError(f"Expected target boxes to be of type Tensor, got {type(boxes)}.")
        original_image_sizes: List[Tuple[int, int]] = []
        for img in images:
            val = img.shape[-2:]
-            assert len(val) == 2
+            if len(val) != 2:
+                raise ValueError(
+                    f"Expecting the last two dimensions of the input tensor to be H and W, instead got {img.shape[-2:]}"
+                )
            original_image_sizes.append((val[0], val[1]))
        images, targets = self.transform(images, targets)

--- a/torchvision/models/detection/keypoint_rcnn.py
+++ b/torchvision/models/detection/keypoint_rcnn.py
@@ -191,7 +191,10 @@ class KeypointRCNN(FasterRCNN):
        num_keypoints=None,
    ):
-        assert isinstance(keypoint_roi_pool, (MultiScaleRoIAlign, type(None)))
+        if not isinstance(keypoint_roi_pool, (MultiScaleRoIAlign, type(None))):
+            raise TypeError(
+                "keypoint_roi_pool should be of type MultiScaleRoIAlign or None instead of {type(keypoint_roi_pool)}"
+            )
        if min_size is None:
            min_size = (640, 672, 704, 736, 768, 800)

--- a/torchvision/models/detection/mask_rcnn.py
+++ b/torchvision/models/detection/mask_rcnn.py
@@ -191,7 +191,10 @@ class MaskRCNN(FasterRCNN):
        mask_predictor=None,
    ):
-        assert isinstance(mask_roi_pool, (MultiScaleRoIAlign, type(None)))
+        if not isinstance(mask_roi_pool, (MultiScaleRoIAlign, type(None))):
+            raise TypeError(
+                f"mask_roi_pool should be of type MultiScaleRoIAlign or None instead of {type(mask_roi_pool)}"
+            )
        if num_classes is not None:
            if mask_predictor is not None:

--- a/torchvision/models/detection/retinanet.py
+++ b/torchvision/models/detection/retinanet.py
@@ -347,7 +347,10 @@ class RetinaNet(nn.Module):
            )
        self.backbone = backbone
-        assert isinstance(anchor_generator, (AnchorGenerator, type(None)))
+        if not isinstance(anchor_generator, (AnchorGenerator, type(None))):
+            raise TypeError(
+                f"anchor_generator should be of type AnchorGenerator or None instead of {type(anchor_generator)}"
+            )
        if anchor_generator is None:
            anchor_sizes = tuple((x, int(x * 2 ** (1.0 / 3)), int(x * 2 ** (2.0 / 3))) for x in [32, 64, 128, 256, 512])
@@ -488,20 +491,24 @@ class RetinaNet(nn.Module):
            raise ValueError("In training mode, targets should be passed")
        if self.training:
-            assert targets is not None
+            if targets is None:
+                raise ValueError("In training mode, targets should be passed")
            for target in targets:
                boxes = target["boxes"]
                if isinstance(boxes, torch.Tensor):
                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
                        raise ValueError(f"Expected target boxes to be a tensor of shape [N, 4], got {boxes.shape}.")
                else:
-                    raise ValueError(f"Expected target boxes to be of type Tensor, got {type(boxes)}.")
+                    raise TypeError(f"Expected target boxes to be of type Tensor, got {type(boxes)}.")
        # get the original image sizes
        original_image_sizes: List[Tuple[int, int]] = []
        for img in images:
            val = img.shape[-2:]
-            assert len(val) == 2
+            if len(val) != 2:
+                raise ValueError(
+                    f"Expecting the two last elements of the input tensors to be H and W instead got {img.shape[-2:]}"
+                )
            original_image_sizes.append((val[0], val[1]))
        # transform the input
@@ -539,8 +546,8 @@ class RetinaNet(nn.Module):
        losses = {}
        detections: List[Dict[str, Tensor]] = []
        if self.training:
-            assert targets is not None
+            if targets is None:
+                raise ValueError("In training mode, targets should be passed")
            # compute the losses
            losses = self.compute_loss(targets, head_outputs, anchors)
        else:

--- a/torchvision/models/detection/roi_heads.py
+++ b/torchvision/models/detection/roi_heads.py
@@ -299,7 +299,10 @@ def heatmaps_to_keypoints(maps, rois):
 def keypointrcnn_loss(keypoint_logits, proposals, gt_keypoints, keypoint_matched_idxs):
    # type: (Tensor, List[Tensor], List[Tensor], List[Tensor]) -> Tensor
    N, K, H, W = keypoint_logits.shape
-    assert H == W
+    if H != W:
+        raise ValueError(
+            f"keypoint_logits height and width (last two elements of shape) should be equal. Instead got H = {H} and W = {W}"
+        )
    discretization_size = H
    heatmaps = []
    valid = []
@@ -615,11 +618,15 @@ class RoIHeads(nn.Module):
    def check_targets(self, targets):
        # type: (Optional[List[Dict[str, Tensor]]]) -> None
-        assert targets is not None
+        if targets is None:
-        assert all(["boxes" in t for t in targets])
+            raise ValueError("targets should not be None")
-        assert all(["labels" in t for t in targets])
+        if not all(["boxes" in t for t in targets]):
+            raise ValueError("Every element of targets should have a boxes key")
+        if not all(["labels" in t for t in targets]):
+            raise ValueError("Every element of targets should have a labels key")
        if self.has_mask():
-            assert all(["masks" in t for t in targets])
+            if not all(["masks" in t for t in targets]):
+                raise ValueError("Every element of targets should have a masks key")
    def select_training_samples(
        self,
@@ -628,7 +635,8 @@ class RoIHeads(nn.Module):
    ):
        # type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor], List[Tensor]]
        self.check_targets(targets)
-        assert targets is not None
+        if targets is None:
+            raise ValueError("targets should not be None")
        dtype = proposals[0].dtype
        device = proposals[0].device
@@ -736,10 +744,13 @@ class RoIHeads(nn.Module):
            for t in targets:
                # TODO: https://github.com/pytorch/pytorch/issues/26731
                floating_point_types = (torch.float, torch.double, torch.half)
-                assert t["boxes"].dtype in floating_point_types, "target boxes must of float type"
+                if not t["boxes"].dtype in floating_point_types:
-                assert t["labels"].dtype == torch.int64, "target labels must of int64 type"
+                    raise TypeError(f"target boxes must of float type, instead got {t['boxes'].dtype}")
+                if not t["labels"].dtype == torch.int64:
+                    raise TypeError("target labels must of int64 type, instead got {t['labels'].dtype}")
                if self.has_keypoint():
-                    assert t["keypoints"].dtype == torch.float32, "target keypoints must of float type"
+                    if not t["keypoints"].dtype == torch.float32:
+                        raise TypeError(f"target keypoints must of float type, instead got {t['keypoints'].dtype}")
        if self.training:
            proposals, matched_idxs, labels, regression_targets = self.select_training_samples(proposals, targets)
@@ -755,7 +766,10 @@ class RoIHeads(nn.Module):
        result: List[Dict[str, torch.Tensor]] = []
        losses = {}
        if self.training:
-            assert labels is not None and regression_targets is not None
+            if labels is None:
+                raise ValueError("labels cannot be None")
+            if regression_targets is None:
+                raise ValueError("regression_targets cannot be None")
            loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
            losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
        else:
@@ -773,7 +787,9 @@ class RoIHeads(nn.Module):
        if self.has_mask():
            mask_proposals = [p["boxes"] for p in result]
            if self.training:
-                assert matched_idxs is not None
+                if matched_idxs is None:
+                    raise ValueError("if in trainning, matched_idxs should not be None")
                # during training, only focus on positive boxes
                num_images = len(proposals)
                mask_proposals = []
@@ -794,9 +810,8 @@ class RoIHeads(nn.Module):
            loss_mask = {}
            if self.training:
-                assert targets is not None
+                if targets is None or pos_matched_idxs is None or mask_logits is None:
-                assert pos_matched_idxs is not None
+                    raise ValueError("targets, pos_matched_idxs, mask_logits cannot be None when training")
-                assert mask_logits is not None
                gt_masks = [t["masks"] for t in targets]
                gt_labels = [t["labels"] for t in targets]
@@ -823,7 +838,9 @@ class RoIHeads(nn.Module):
                num_images = len(proposals)
                keypoint_proposals = []
                pos_matched_idxs = []
-                assert matched_idxs is not None
+                if matched_idxs is None:
+                    raise ValueError("if in trainning, matched_idxs should not be None")
                for img_id in range(num_images):
                    pos = torch.where(labels[img_id] > 0)[0]
                    keypoint_proposals.append(proposals[img_id][pos])
@@ -837,8 +854,8 @@ class RoIHeads(nn.Module):
            loss_keypoint = {}
            if self.training:
-                assert targets is not None
+                if targets is None or pos_matched_idxs is None:
-                assert pos_matched_idxs is not None
+                    raise ValueError("both targets and pos_matched_idxs should not be None when in training mode")
                gt_keypoints = [t["keypoints"] for t in targets]
                rcnn_loss_keypoint = keypointrcnn_loss(
@@ -846,14 +863,15 @@ class RoIHeads(nn.Module):
                )
                loss_keypoint = {"loss_keypoint": rcnn_loss_keypoint}
            else:
-                assert keypoint_logits is not None
+                if keypoint_logits is None or keypoint_proposals is None:
-                assert keypoint_proposals is not None
+                    raise ValueError(
+                        "both keypoint_logits and keypoint_proposals should not be None when not in training mode"
+                    )
                keypoints_probs, kp_scores = keypointrcnn_inference(keypoint_logits, keypoint_proposals)
                for keypoint_prob, kps, r in zip(keypoints_probs, kp_scores, result):
                    r["keypoints"] = keypoint_prob
                    r["keypoints_scores"] = kps
            losses.update(loss_keypoint)
        return result, losses
--- a/torchvision/models/detection/rpn.py
+++ b/torchvision/models/detection/rpn.py
@@ -339,7 +339,8 @@ class RegionProposalNetwork(torch.nn.Module):
        losses = {}
        if self.training:
-            assert targets is not None
+            if targets is None:
+                raise ValueError("targets should not be None")
            labels, matched_gt_boxes = self.assign_targets_to_anchors(anchors, targets)
            regression_targets = self.box_coder.encode(matched_gt_boxes, anchors)
            loss_objectness, loss_rpn_box_reg = self.compute_loss(

--- a/torchvision/models/detection/ssd.py
+++ b/torchvision/models/detection/ssd.py
@@ -196,7 +196,10 @@ class SSD(nn.Module):
            else:
                out_channels = det_utils.retrieve_out_channels(backbone, size)
-            assert len(out_channels) == len(anchor_generator.aspect_ratios)
+            if len(out_channels) != len(anchor_generator.aspect_ratios):
+                raise ValueError(
+                    f"The length of the output channels from the backbone ({len(out_channels)}) do not match the length of the anchor generator aspect ratios ({len(anchor_generator.aspect_ratios)})"
+                )
            num_anchors = self.anchor_generator.num_anchors_per_location()
            head = SSDHead(out_channels, num_anchors, num_classes)
@@ -308,20 +311,24 @@ class SSD(nn.Module):
            raise ValueError("In training mode, targets should be passed")
        if self.training:
-            assert targets is not None
+            if targets is None:
+                raise ValueError("targets should not be None")
            for target in targets:
                boxes = target["boxes"]
                if isinstance(boxes, torch.Tensor):
                    if len(boxes.shape) != 2 or boxes.shape[-1] != 4:
                        raise ValueError(f"Expected target boxes to be a tensor of shape [N, 4], got {boxes.shape}.")
                else:
-                    raise ValueError(f"Expected target boxes to be of type Tensor, got {type(boxes)}.")
+                    raise TypeError(f"Expected target boxes to be of type Tensor, got {type(boxes)}.")
        # get the original image sizes
        original_image_sizes: List[Tuple[int, int]] = []
        for img in images:
            val = img.shape[-2:]
-            assert len(val) == 2
+            if len(val) != 2:
+                raise ValueError(
+                    f"The last two dimensions of the input tensors should contain H and W, instead got {img.shape[-2:]}"
+                )
            original_image_sizes.append((val[0], val[1]))
        # transform the input
@@ -356,7 +363,8 @@ class SSD(nn.Module):
        losses = {}
        detections: List[Dict[str, Tensor]] = []
        if self.training:
-            assert targets is not None
+            if targets is None:
+                raise ValueError("targets should not be None when in training mode")
            matched_idxs = []
            for anchors_per_image, targets_per_image in zip(anchors, targets):
@@ -527,7 +535,8 @@ def _vgg_extractor(backbone: vgg.VGG, highres: bool, trainable_layers: int):
    num_stages = len(stage_indices)
    # find the index of the layer from which we wont freeze
-    assert 0 <= trainable_layers <= num_stages
+    if not 0 <= trainable_layers <= num_stages:
+        raise ValueError(f"trainable_layers should be in the range [0, {num_stages}]. Instead got {trainable_layers}")
    freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers]
    for b in backbone[:freeze_before]:

--- a/torchvision/models/detection/ssdlite.py
+++ b/torchvision/models/detection/ssdlite.py
@@ -122,7 +122,9 @@ class SSDLiteFeatureExtractorMobileNet(nn.Module):
        super().__init__()
        _log_api_usage_once(self)
-        assert not backbone[c4_pos].use_res_connect
+        if backbone[c4_pos].use_res_connect:
+            raise ValueError("backbone[c4_pos].use_res_connect should be False")
        self.features = nn.Sequential(
            # As described in section 6.3 of MobileNetV3 paper
            nn.Sequential(*backbone[:c4_pos], backbone[c4_pos].block[0]),  # from start until C4 expansion layer
@@ -168,7 +170,8 @@ def _mobilenet_extractor(
    num_stages = len(stage_indices)
    # find the index of the layer from which we wont freeze
-    assert 0 <= trainable_layers <= num_stages
+    if not 0 <= trainable_layers <= num_stages:
+        raise ValueError("trainable_layers should be in the range [0, {num_stages}], instead got {trainable_layers}")
    freeze_before = len(backbone) if trainable_layers == 0 else stage_indices[num_stages - trainable_layers]
    for b in backbone[:freeze_before]:
@@ -244,7 +247,10 @@ def ssdlite320_mobilenet_v3_large(
    anchor_generator = DefaultBoxGenerator([[2, 3] for _ in range(6)], min_ratio=0.2, max_ratio=0.95)
    out_channels = det_utils.retrieve_out_channels(backbone, size)
    num_anchors = anchor_generator.num_anchors_per_location()
-    assert len(out_channels) == len(anchor_generator.aspect_ratios)
+    if len(out_channels) != len(anchor_generator.aspect_ratios):
+        raise ValueError(
+            f"The length of the output channels from the backbone {len(out_channels)} do not match the length of the anchor generator aspect ratios {len(anchor_generator.aspect_ratios)}"
+        )
    defaults = {
        "score_thresh": 0.001,

--- a/torchvision/models/detection/transform.py
+++ b/torchvision/models/detection/transform.py
@@ -134,7 +134,10 @@ class GeneralizedRCNNTransform(nn.Module):
        images = self.batch_images(images, size_divisible=self.size_divisible)
        image_sizes_list: List[Tuple[int, int]] = []
        for image_size in image_sizes:
-            assert len(image_size) == 2
+            if len(image_size) != 2:
+                raise ValueError(
+                    f"Input tensors expected to have in the last two elements H and W, instead got {image_size}"
+                )
            image_sizes_list.append((image_size[0], image_size[1]))
        image_list = ImageList(images, image_sizes_list)

--- a/torchvision/models/feature_extraction.py
+++ b/torchvision/models/feature_extraction.py
@@ -277,7 +277,8 @@ class DualGraphModule(fx.GraphModule):
        # eval graphs)
        for node in chain(iter(train_graph.nodes), iter(eval_graph.nodes)):
            if node.op in ["get_attr", "call_module"]:
-                assert isinstance(node.target, str)
+                if not isinstance(node.target, str):
+                    raise TypeError(f"node.target should be of type str instead of {type(node.target)}")
                _copy_attr(root, self, node.target)
        # train mode by default
@@ -290,9 +291,10 @@ class DualGraphModule(fx.GraphModule):
        # Locally defined Tracers are not pickleable. This is needed because torch.package will
        # serialize a GraphModule without retaining the Graph, and needs to use the correct Tracer
        # to re-create the Graph during deserialization.
-        assert (
+        if self.eval_graph._tracer_cls != self.train_graph._tracer_cls:
-            self.eval_graph._tracer_cls == self.train_graph._tracer_cls
+            raise TypeError(
-        ), "Train mode and eval mode should use the same tracer class"
+                f"Train mode and eval mode should use the same tracer class. Instead got {self.eval_graph._tracer_cls} for eval vs {self.train_graph._tracer_cls} for train"
+            )
        self._tracer_cls = None
        if self.graph._tracer_cls and "<locals>" not in self.graph._tracer_cls.__qualname__:
            self._tracer_cls = self.graph._tracer_cls
@@ -431,17 +433,19 @@ def create_feature_extractor(
        }
    is_training = model.training
-    assert any(
+    if all(arg is None for arg in [return_nodes, train_return_nodes, eval_return_nodes]):
-        arg is not None for arg in [return_nodes, train_return_nodes, eval_return_nodes]
-    ), "Either `return_nodes` or `train_return_nodes` and `eval_return_nodes` together, should be specified"
-    assert not (
+        raise ValueError(
-        (train_return_nodes is None) ^ (eval_return_nodes is None)
+            "Either `return_nodes` or `train_return_nodes` and `eval_return_nodes` together, should be specified"
-    ), "If any of `train_return_nodes` and `eval_return_nodes` are specified, then both should be specified"
+        )
+    if (train_return_nodes is None) ^ (eval_return_nodes is None):
+        raise ValueError(
+            "If any of `train_return_nodes` and `eval_return_nodes` are specified, then both should be specified"
+        )
-    assert (return_nodes is None) ^ (
+    if not ((return_nodes is None) ^ (train_return_nodes is None)):
-        train_return_nodes is None
+        raise ValueError("If `train_return_nodes` and `eval_return_nodes` are specified, then both should be specified")
-    ), "If `train_return_nodes` and `eval_return_nodes` are specified, then both should be specified"
    # Put *_return_nodes into Dict[str, str] format
    def to_strdict(n) -> Dict[str, str]:
@@ -476,9 +480,10 @@ def create_feature_extractor(
        available_nodes = list(tracer.node_to_qualname.values())
        # FIXME We don't know if we should expect this to happen
-        assert len(set(available_nodes)) == len(
+        if len(set(available_nodes)) != len(available_nodes):
-            available_nodes
+            raise ValueError(
-        ), "There are duplicate nodes! Please raise an issue https://github.com/pytorch/vision/issues"
+                "There are duplicate nodes! Please raise an issue https://github.com/pytorch/vision/issues"
+            )
        # Check that all outputs in return_nodes are present in the model
        for query in mode_return_nodes[mode].keys():
            # To check if a query is available we need to check that at least
@@ -497,7 +502,9 @@ def create_feature_extractor(
        for n in reversed(graph_module.graph.nodes):
            if n.op == "output":
                orig_output_nodes.append(n)
-        assert len(orig_output_nodes)
+        if not orig_output_nodes:
+            raise ValueError("No output nodes found in graph_module.graph.nodes")
        for n in orig_output_nodes:
            graph_module.graph.erase_node(n)

--- a/torchvision/models/googlenet.py
+++ b/torchvision/models/googlenet.py
@@ -50,7 +50,8 @@ class GoogLeNet(nn.Module):
                FutureWarning,
            )
            init_weights = True
-        assert len(blocks) == 3
+        if len(blocks) != 3:
+            raise ValueError(f"blocks length should be 3 instead of {len(blocks)}")
        conv_block = blocks[0]
        inception_block = blocks[1]
        inception_aux_block = blocks[2]

--- a/torchvision/models/inception.py
+++ b/torchvision/models/inception.py
@@ -48,7 +48,8 @@ class Inception3(nn.Module):
                FutureWarning,
            )
            init_weights = True
-        assert len(inception_blocks) == 7
+        if len(inception_blocks) != 7:
+            raise ValueError(f"lenght of inception_blocks should be 7 instead of {len(inception_blocks)}")
        conv_block = inception_blocks[0]
        inception_a = inception_blocks[1]
        inception_b = inception_blocks[2]

--- a/torchvision/models/mnasnet.py
+++ b/torchvision/models/mnasnet.py
@@ -27,8 +27,10 @@ class _InvertedResidual(nn.Module):
        self, in_ch: int, out_ch: int, kernel_size: int, stride: int, expansion_factor: int, bn_momentum: float = 0.1
    ) -> None:
        super().__init__()
-        assert stride in [1, 2]
+        if stride not in [1, 2]:
-        assert kernel_size in [3, 5]
+            raise ValueError(f"stride should be 1 or 2 instead of {stride}")
+        if kernel_size not in [3, 5]:
+            raise ValueError(f"kernel_size should be 3 or 5 instead of {kernel_size}")
        mid_ch = in_ch * expansion_factor
        self.apply_residual = in_ch == out_ch and stride == 1
        self.layers = nn.Sequential(
@@ -56,7 +58,8 @@ def _stack(
    in_ch: int, out_ch: int, kernel_size: int, stride: int, exp_factor: int, repeats: int, bn_momentum: float
 ) -> nn.Sequential:
    """Creates a stack of inverted residuals."""
-    assert repeats >= 1
+    if repeats < 1:
+        raise ValueError(f"repeats should be >= 1, instead got {repeats}")
    # First one has no skip, because feature map size changes.
    first = _InvertedResidual(in_ch, out_ch, kernel_size, stride, exp_factor, bn_momentum=bn_momentum)
    remaining = []
@@ -69,7 +72,8 @@ def _round_to_multiple_of(val: float, divisor: int, round_up_bias: float = 0.9)
    """Asymmetric rounding to make `val` divisible by `divisor`. With default
    bias, will round up, unless the number is no more than 10% greater than the
    smaller divisible value, i.e. (83, 8) -> 80, but (84, 8) -> 88."""
-    assert 0.0 < round_up_bias < 1.0
+    if not 0.0 < round_up_bias < 1.0:
+        raise ValueError(f"round_up_bias should be greater than 0.0 and smaller than 1.0 instead of {round_up_bias}")
    new_val = max(divisor, int(val + divisor / 2) // divisor * divisor)
    return new_val if new_val >= round_up_bias * val else new_val + divisor
@@ -99,7 +103,8 @@ class MNASNet(torch.nn.Module):
    def __init__(self, alpha: float, num_classes: int = 1000, dropout: float = 0.2) -> None:
        super().__init__()
        _log_api_usage_once(self)
-        assert alpha > 0.0
+        if alpha <= 0.0:
+            raise ValueError(f"alpha should be greater than 0.0 instead of {alpha}")
        self.alpha = alpha
        self.num_classes = num_classes
        depths = _get_depths(alpha)
@@ -158,7 +163,8 @@ class MNASNet(torch.nn.Module):
        error_msgs: List[str],
    ) -> None:
        version = local_metadata.get("version", None)
-        assert version in [1, 2]
+        if version not in [1, 2]:
+            raise ValueError(f"version shluld be set to 1 or 2 instead of {version}")
        if version == 1 and not self.alpha == 1.0:
            # In the initial version of the model (v1), stem was fixed-size.

--- a/torchvision/models/mobilenetv2.py
+++ b/torchvision/models/mobilenetv2.py
@@ -44,7 +44,8 @@ class InvertedResidual(nn.Module):
    ) -> None:
        super().__init__()
        self.stride = stride
-        assert stride in [1, 2]
+        if stride not in [1, 2]:
+            raise ValueError(f"stride should be 1 or 2 insted of {stride}")
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d

--- a/torchvision/models/optical_flow/raft.py
+++ b/torchvision/models/optical_flow/raft.py
@@ -121,7 +121,8 @@ class FeatureEncoder(nn.Module):
    def __init__(self, *, block=ResidualBlock, layers=(64, 64, 96, 128, 256), norm_layer=nn.BatchNorm2d):
        super().__init__()
-        assert len(layers) == 5
+        if len(layers) != 5:
+            raise ValueError(f"The expected number of layers is 5, instead got {len(layers)}")
        # See note in ResidualBlock for the reason behind bias=True
        self.convnormrelu = Conv2dNormActivation(
@@ -169,8 +170,10 @@ class MotionEncoder(nn.Module):
    def __init__(self, *, in_channels_corr, corr_layers=(256, 192), flow_layers=(128, 64), out_channels=128):
        super().__init__()
-        assert len(flow_layers) == 2
+        if len(flow_layers) != 2:
-        assert len(corr_layers) in (1, 2)
+            raise ValueError(f"The expected number of flow_layers is 2, instead got {len(flow_layers)}")
+        if len(corr_layers) not in (1, 2):
+            raise ValueError(f"The number of corr_layers should be 1 or 2, instead got {len(corr_layers)}")
        self.convcorr1 = Conv2dNormActivation(in_channels_corr, corr_layers[0], norm_layer=None, kernel_size=1)
        if len(corr_layers) == 2:
@@ -234,8 +237,12 @@ class RecurrentBlock(nn.Module):
    def __init__(self, *, input_size, hidden_size, kernel_size=((1, 5), (5, 1)), padding=((0, 2), (2, 0))):
        super().__init__()
-        assert len(kernel_size) == len(padding)
+        if len(kernel_size) != len(padding):
-        assert len(kernel_size) in (1, 2)
+            raise ValueError(
+                f"kernel_size should have the same length as padding, instead got len(kernel_size) = {len(kernel_size)} and len(padding) = {len(padding)}"
+            )
+        if len(kernel_size) not in (1, 2):
+            raise ValueError(f"kernel_size should either 1 or 2, instead got {len(kernel_size)}")
        self.convgru1 = ConvGRU(
            input_size=input_size, hidden_size=hidden_size, kernel_size=kernel_size[0], padding=padding[0]
@@ -351,7 +358,10 @@ class CorrBlock(nn.Module):
        to build the correlation pyramid.
        """
-        torch._assert(fmap1.shape == fmap2.shape, "Input feature maps should have the same shapes")
+        if fmap1.shape != fmap2.shape:
+            raise ValueError(
+                f"Input feature maps should have the same shape, instead got {fmap1.shape} (fmap1.shape) != {fmap2.shape} (fmap2.shape)"
+            )
        corr_volume = self._compute_corr_volume(fmap1, fmap2)
        batch_size, h, w, num_channels, _, _ = corr_volume.shape  # _, _ = h, w
@@ -384,10 +394,10 @@ class CorrBlock(nn.Module):
        corr_features = torch.cat(indexed_pyramid, dim=-1).permute(0, 3, 1, 2).contiguous()
        expected_output_shape = (batch_size, self.out_channels, h, w)
-        torch._assert(
+        if corr_features.shape != expected_output_shape:
-            corr_features.shape == expected_output_shape,
+            raise ValueError(
-            f"Output shape of index pyramid is incorrect. Should be {expected_output_shape}, got {corr_features.shape}",
+                f"Output shape of index pyramid is incorrect. Should be {expected_output_shape}, got {corr_features.shape}"
-        )
+            )
        return corr_features
@@ -454,28 +464,31 @@ class RAFT(nn.Module):
    def forward(self, image1, image2, num_flow_updates: int = 12):
        batch_size, _, h, w = image1.shape
-        torch._assert((h, w) == image2.shape[-2:], "input images should have the same shape")
+        if (h, w) != image2.shape[-2:]:
-        torch._assert((h % 8 == 0) and (w % 8 == 0), "input image H and W should be divisible by 8")
+            raise ValueError(f"input images should have the same shape, instead got ({h}, {w}) != {image2.shape[-2:]}")
+        if not (h % 8 == 0) and (w % 8 == 0):
+            raise ValueError(f"input image H and W should be divisible by 8, insted got {h} (h) and {w} (w)")
        fmaps = self.feature_encoder(torch.cat([image1, image2], dim=0))
        fmap1, fmap2 = torch.chunk(fmaps, chunks=2, dim=0)
-        torch._assert(fmap1.shape[-2:] == (h // 8, w // 8), "The feature encoder should downsample H and W by 8")
+        if fmap1.shape[-2:] != (h // 8, w // 8):
+            raise ValueError("The feature encoder should downsample H and W by 8")
        self.corr_block.build_pyramid(fmap1, fmap2)
        context_out = self.context_encoder(image1)
-        torch._assert(context_out.shape[-2:] == (h // 8, w // 8), "The context encoder should downsample H and W by 8")
+        if context_out.shape[-2:] != (h // 8, w // 8):
+            raise ValueError("The context encoder should downsample H and W by 8")
        # As in the original paper, the actual output of the context encoder is split in 2 parts:
        # - one part is used to initialize the hidden state of the recurent units of the update block
        # - the rest is the "actual" context.
        hidden_state_size = self.update_block.hidden_state_size
        out_channels_context = context_out.shape[1] - hidden_state_size
-        torch._assert(
+        if out_channels_context <= 0:
-            out_channels_context > 0,
+            raise ValueError(
-            f"The context encoder outputs {context_out.shape[1]} channels, but it should have at strictly more than"
+                f"The context encoder outputs {context_out.shape[1]} channels, but it should have at strictly more than hidden_state={hidden_state_size} channels"
-            f"hidden_state={hidden_state_size} channels",
+            )
-        )
        hidden_state, context = torch.split(context_out, [hidden_state_size, out_channels_context], dim=1)
        hidden_state = torch.tanh(hidden_state)
        context = F.relu(context)

--- a/torchvision/models/shufflenetv2.py
+++ b/torchvision/models/shufflenetv2.py
@@ -42,7 +42,10 @@ class InvertedResidual(nn.Module):
        self.stride = stride
        branch_features = oup // 2
-        assert (self.stride != 1) or (inp == branch_features << 1)
+        if (self.stride == 1) and (inp != branch_features << 1):
+            raise ValueError(
+                f"Invalid combination of stride {stride}, inp {inp} and oup {oup} values. If stride == 1 then inp should be equal to oup // 2 << 1."
+            )
        if self.stride > 1:
            self.branch1 = nn.Sequential(

--- a/torchvision/models/vision_transformer.py
+++ b/torchvision/models/vision_transformer.py
@@ -434,7 +434,10 @@ def interpolate_embeddings(
        # (1, seq_length, hidden_dim) -> (1, hidden_dim, seq_length)
        pos_embedding_img = pos_embedding_img.permute(0, 2, 1)
        seq_length_1d = int(math.sqrt(seq_length))
-        torch._assert(seq_length_1d * seq_length_1d == seq_length, "seq_length is not a perfect square!")
+        if seq_length_1d * seq_length_1d != seq_length:
+            raise ValueError(
+                f"seq_length is not a perfect square! Instead got seq_length_1d * seq_length_1d = {seq_length_1d * seq_length_1d } and seq_length = {seq_length}"
+            )
        # (1, hidden_dim, seq_length) -> (1, hidden_dim, seq_l_1d, seq_l_1d)
        pos_embedding_img = pos_embedding_img.reshape(1, hidden_dim, seq_length_1d, seq_length_1d)

--- a/torchvision/ops/_utils.py
+++ b/torchvision/ops/_utils.py
@@ -28,13 +28,13 @@ def convert_boxes_to_roi_format(boxes: List[Tensor]) -> Tensor:
 def check_roi_boxes_shape(boxes: Union[Tensor, List[Tensor]]):
    if isinstance(boxes, (list, tuple)):
        for _tensor in boxes:
-            assert (
+            if _tensor.size(1) != 4:
-                _tensor.size(1) == 4
+                raise ValueError("The shape of the tensor in the boxes list is not correct as List[Tensor[L, 4]].")
-            ), "The shape of the tensor in the boxes list is not correct as List[Tensor[L, 4]]"
    elif isinstance(boxes, torch.Tensor):
-        assert boxes.size(1) == 5, "The boxes tensor shape is not correct as Tensor[K, 5]"
+        if boxes.size(1) != 5:
+            raise ValueError("The boxes tensor shape is not correct as Tensor[K, 5]/")
    else:
-        assert False, "boxes is expected to be a Tensor[L, 5] or a List[Tensor[K, 4]]"
+        raise TypeError(f"boxes is expected to be a Tensor[L, 5] or a List[Tensor[K, 4]], instead got {type(boxes)}")
    return