New schema for metrics in weights meta-data (#6047)

* Classif models * Detection * Segmentation * quantization * Video * optical flow * tests * Fix docs * Fix Video dataset * Consistency for RAFT dataset names * use ImageNet-1K * Use COCO-val2017-VOC-labels for segmentation * formatting

New schema for metrics in weights meta-data (#6047)
* Classif models * Detection * Segmentation * quantization * Video * optical flow * tests * Fix docs * Fix Video dataset * Consistency for RAFT dataset names * use ImageNet-1K * Use COCO-val2017-VOC-labels for segmentation * formatting
2ec0e847 · Nicolas Hug · GitHub · 2a35dde3 · 2ec0e847 · 2ec0e847
Unverified Commit 2ec0e847 authored May 18, 2022 by Nicolas Hug Committed by GitHub May 18, 2022
16 changed files
--- a/torchvision/models/quantization/inception.py
+++ b/torchvision/models/quantization/inception.py
@@ -183,9 +183,11 @@ class Inception_V3_QuantizedWeights(WeightsEnum):
            "backend": "fbgemm",
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#post-training-quantized-models",
            "unquantized": Inception_V3_Weights.IMAGENET1K_V1,
-            "metrics": {
-                "acc@1": 77.176,
-                "acc@5": 93.354,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 77.176,
+                    "acc@5": 93.354,
+                }
            },
            "_docs": """
                These weights were produced by doing Post Training Quantization (eager mode) on top of the unquantized

--- a/torchvision/models/quantization/mobilenetv2.py
+++ b/torchvision/models/quantization/mobilenetv2.py
@@ -75,9 +75,11 @@ class MobileNet_V2_QuantizedWeights(WeightsEnum):
            "backend": "qnnpack",
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#qat-mobilenetv2",
            "unquantized": MobileNet_V2_Weights.IMAGENET1K_V1,
-            "metrics": {
-                "acc@1": 71.658,
-                "acc@5": 90.150,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 71.658,
+                    "acc@5": 90.150,
+                }
            },
            "_docs": """
                These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized

--- a/torchvision/models/quantization/mobilenetv3.py
+++ b/torchvision/models/quantization/mobilenetv3.py
@@ -169,9 +169,11 @@ class MobileNet_V3_Large_QuantizedWeights(WeightsEnum):
            "backend": "qnnpack",
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#qat-mobilenetv3",
            "unquantized": MobileNet_V3_Large_Weights.IMAGENET1K_V1,
-            "metrics": {
-                "acc@1": 73.004,
-                "acc@5": 90.858,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 73.004,
+                    "acc@5": 90.858,
+                }
            },
            "_docs": """
                These weights were produced by doing Quantization Aware Training (eager mode) on top of the unquantized

--- a/torchvision/models/quantization/resnet.py
+++ b/torchvision/models/quantization/resnet.py
@@ -169,9 +169,11 @@ class ResNet18_QuantizedWeights(WeightsEnum):
            **_COMMON_META,
            "num_params": 11689512,
            "unquantized": ResNet18_Weights.IMAGENET1K_V1,
-            "metrics": {
-                "acc@1": 69.494,
-                "acc@5": 88.882,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 69.494,
+                    "acc@5": 88.882,
+                }
            },
        },
    )
@@ -186,9 +188,11 @@ class ResNet50_QuantizedWeights(WeightsEnum):
            **_COMMON_META,
            "num_params": 25557032,
            "unquantized": ResNet50_Weights.IMAGENET1K_V1,
-            "metrics": {
-                "acc@1": 75.920,
-                "acc@5": 92.814,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 75.920,
+                    "acc@5": 92.814,
+                }
            },
        },
    )
@@ -199,9 +203,11 @@ class ResNet50_QuantizedWeights(WeightsEnum):
            **_COMMON_META,
            "num_params": 25557032,
            "unquantized": ResNet50_Weights.IMAGENET1K_V2,
-            "metrics": {
-                "acc@1": 80.282,
-                "acc@5": 94.976,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 80.282,
+                    "acc@5": 94.976,
+                }
            },
        },
    )
@@ -216,9 +222,11 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum):
            **_COMMON_META,
            "num_params": 88791336,
            "unquantized": ResNeXt101_32X8D_Weights.IMAGENET1K_V1,
-            "metrics": {
-                "acc@1": 78.986,
-                "acc@5": 94.480,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 78.986,
+                    "acc@5": 94.480,
+                }
            },
        },
    )
@@ -229,9 +237,11 @@ class ResNeXt101_32X8D_QuantizedWeights(WeightsEnum):
            **_COMMON_META,
            "num_params": 88791336,
            "unquantized": ResNeXt101_32X8D_Weights.IMAGENET1K_V2,
-            "metrics": {
-                "acc@1": 82.574,
-                "acc@5": 96.132,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 82.574,
+                    "acc@5": 96.132,
+                }
            },
        },
    )
@@ -247,9 +257,11 @@ class ResNeXt101_64X4D_QuantizedWeights(WeightsEnum):
            "num_params": 83455272,
            "recipe": "https://github.com/pytorch/vision/pull/5935",
            "unquantized": ResNeXt101_64X4D_Weights.IMAGENET1K_V1,
-            "metrics": {
-                "acc@1": 82.898,
-                "acc@5": 96.326,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 82.898,
+                    "acc@5": 96.326,
+                }
            },
        },
    )

--- a/torchvision/models/quantization/shufflenetv2.py
+++ b/torchvision/models/quantization/shufflenetv2.py
@@ -133,9 +133,11 @@ class ShuffleNet_V2_X0_5_QuantizedWeights(WeightsEnum):
            **_COMMON_META,
            "num_params": 1366792,
            "unquantized": ShuffleNet_V2_X0_5_Weights.IMAGENET1K_V1,
-            "metrics": {
-                "acc@1": 57.972,
-                "acc@5": 79.780,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 57.972,
+                    "acc@5": 79.780,
+                }
            },
        },
    )
@@ -150,9 +152,11 @@ class ShuffleNet_V2_X1_0_QuantizedWeights(WeightsEnum):
            **_COMMON_META,
            "num_params": 2278604,
            "unquantized": ShuffleNet_V2_X1_0_Weights.IMAGENET1K_V1,
-            "metrics": {
-                "acc@1": 68.360,
-                "acc@5": 87.582,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 68.360,
+                    "acc@5": 87.582,
+                }
            },
        },
    )
@@ -168,9 +172,11 @@ class ShuffleNet_V2_X1_5_QuantizedWeights(WeightsEnum):
            "recipe": "https://github.com/pytorch/vision/pull/5906",
            "num_params": 3503624,
            "unquantized": ShuffleNet_V2_X1_5_Weights.IMAGENET1K_V1,
-            "metrics": {
-                "acc@1": 72.052,
-                "acc@5": 90.700,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 72.052,
+                    "acc@5": 90.700,
+                }
            },
        },
    )
@@ -186,9 +192,11 @@ class ShuffleNet_V2_X2_0_QuantizedWeights(WeightsEnum):
            "recipe": "https://github.com/pytorch/vision/pull/5906",
            "num_params": 7393996,
            "unquantized": ShuffleNet_V2_X2_0_Weights.IMAGENET1K_V1,
-            "metrics": {
-                "acc@1": 75.354,
-                "acc@5": 92.488,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 75.354,
+                    "acc@5": 92.488,
+                }
            },
        },
    )

--- a/torchvision/models/regnet.py
+++ b/torchvision/models/regnet.py
--- a/torchvision/models/resnet.py
+++ b/torchvision/models/resnet.py
@@ -317,9 +317,11 @@ class ResNet18_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 11689512,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet",
-            "metrics": {
-                "acc@1": 69.758,
-                "acc@5": 89.078,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 69.758,
+                    "acc@5": 89.078,
+                }
            },
            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
        },
@@ -335,9 +337,11 @@ class ResNet34_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 21797672,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet",
-            "metrics": {
-                "acc@1": 73.314,
-                "acc@5": 91.420,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 73.314,
+                    "acc@5": 91.420,
+                }
            },
            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
        },
@@ -353,9 +357,11 @@ class ResNet50_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 25557032,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet",
-            "metrics": {
-                "acc@1": 76.130,
-                "acc@5": 92.862,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 76.130,
+                    "acc@5": 92.862,
+                }
            },
            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
        },
@@ -367,9 +373,11 @@ class ResNet50_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 25557032,
            "recipe": "https://github.com/pytorch/vision/issues/3995#issuecomment-1013906621",
-            "metrics": {
-                "acc@1": 80.858,
-                "acc@5": 95.434,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 80.858,
+                    "acc@5": 95.434,
+                }
            },
            "_docs": """
                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
@@ -388,9 +396,11 @@ class ResNet101_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 44549160,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet",
-            "metrics": {
-                "acc@1": 77.374,
-                "acc@5": 93.546,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 77.374,
+                    "acc@5": 93.546,
+                }
            },
            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
        },
@@ -402,9 +412,11 @@ class ResNet101_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 44549160,
            "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe",
-            "metrics": {
-                "acc@1": 81.886,
-                "acc@5": 95.780,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 81.886,
+                    "acc@5": 95.780,
+                }
            },
            "_docs": """
                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
@@ -423,9 +435,11 @@ class ResNet152_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 60192808,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnet",
-            "metrics": {
-                "acc@1": 78.312,
-                "acc@5": 94.046,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 78.312,
+                    "acc@5": 94.046,
+                }
            },
            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
        },
@@ -437,9 +451,11 @@ class ResNet152_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 60192808,
            "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe",
-            "metrics": {
-                "acc@1": 82.284,
-                "acc@5": 96.002,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 82.284,
+                    "acc@5": 96.002,
+                }
            },
            "_docs": """
                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
@@ -458,9 +474,11 @@ class ResNeXt50_32X4D_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 25028904,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext",
-            "metrics": {
-                "acc@1": 77.618,
-                "acc@5": 93.698,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 77.618,
+                    "acc@5": 93.698,
+                }
            },
            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
        },
@@ -472,9 +490,11 @@ class ResNeXt50_32X4D_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 25028904,
            "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe",
-            "metrics": {
-                "acc@1": 81.198,
-                "acc@5": 95.340,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 81.198,
+                    "acc@5": 95.340,
+                }
            },
            "_docs": """
                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
@@ -493,9 +513,11 @@ class ResNeXt101_32X8D_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 88791336,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#resnext",
-            "metrics": {
-                "acc@1": 79.312,
-                "acc@5": 94.526,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 79.312,
+                    "acc@5": 94.526,
+                }
            },
            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
        },
@@ -507,9 +529,11 @@ class ResNeXt101_32X8D_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 88791336,
            "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres",
-            "metrics": {
-                "acc@1": 82.834,
-                "acc@5": 96.228,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 82.834,
+                    "acc@5": 96.228,
+                }
            },
            "_docs": """
                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
@@ -528,9 +552,11 @@ class ResNeXt101_64X4D_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 83455272,
            "recipe": "https://github.com/pytorch/vision/pull/5935",
-            "metrics": {
-                "acc@1": 83.246,
-                "acc@5": 96.454,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 83.246,
+                    "acc@5": 96.454,
+                }
            },
            "_docs": """
                These weights were trained from scratch by using TorchVision's `new training recipe
@@ -549,9 +575,11 @@ class Wide_ResNet50_2_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 68883240,
            "recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439",
-            "metrics": {
-                "acc@1": 78.468,
-                "acc@5": 94.086,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 78.468,
+                    "acc@5": 94.086,
+                }
            },
            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
        },
@@ -563,9 +591,11 @@ class Wide_ResNet50_2_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 68883240,
            "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe-with-fixres",
-            "metrics": {
-                "acc@1": 81.602,
-                "acc@5": 95.758,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 81.602,
+                    "acc@5": 95.758,
+                }
            },
            "_docs": """
                These weights improve upon the results of the original paper by using TorchVision's `new training recipe
@@ -584,9 +614,11 @@ class Wide_ResNet101_2_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 126886696,
            "recipe": "https://github.com/pytorch/vision/pull/912#issue-445437439",
-            "metrics": {
-                "acc@1": 78.848,
-                "acc@5": 94.284,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 78.848,
+                    "acc@5": 94.284,
+                }
            },
            "_docs": """These weights reproduce closely the results of the paper using a simple training recipe.""",
        },
@@ -598,9 +630,11 @@ class Wide_ResNet101_2_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 126886696,
            "recipe": "https://github.com/pytorch/vision/issues/3995#new-recipe",
-            "metrics": {
-                "acc@1": 82.510,
-                "acc@5": 96.020,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 82.510,
+                    "acc@5": 96.020,
+                }
            },
            "_docs": """
                These weights improve upon the results of the original paper by using TorchVision's `new training recipe

--- a/torchvision/models/segmentation/deeplabv3.py
+++ b/torchvision/models/segmentation/deeplabv3.py
@@ -146,9 +146,11 @@ class DeepLabV3_ResNet50_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 42004074,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_resnet50",
-            "metrics": {
-                "miou": 66.4,
-                "pixel_acc": 92.4,
+            "_metrics": {
+                "COCO-val2017-VOC-labels": {
+                    "miou": 66.4,
+                    "pixel_acc": 92.4,
+                }
            },
        },
    )
@@ -163,9 +165,11 @@ class DeepLabV3_ResNet101_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 60996202,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#fcn_resnet101",
-            "metrics": {
-                "miou": 67.4,
-                "pixel_acc": 92.4,
+            "_metrics": {
+                "COCO-val2017-VOC-labels": {
+                    "miou": 67.4,
+                    "pixel_acc": 92.4,
+                }
            },
        },
    )
@@ -180,9 +184,11 @@ class DeepLabV3_MobileNet_V3_Large_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 11029328,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_mobilenet_v3_large",
-            "metrics": {
-                "miou": 60.3,
-                "pixel_acc": 91.2,
+            "_metrics": {
+                "COCO-val2017-VOC-labels": {
+                    "miou": 60.3,
+                    "pixel_acc": 91.2,
+                }
            },
        },
    )

--- a/torchvision/models/segmentation/fcn.py
+++ b/torchvision/models/segmentation/fcn.py
@@ -65,9 +65,11 @@ class FCN_ResNet50_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 35322218,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#fcn_resnet50",
-            "metrics": {
-                "miou": 60.5,
-                "pixel_acc": 91.4,
+            "_metrics": {
+                "COCO-val2017-VOC-labels": {
+                    "miou": 60.5,
+                    "pixel_acc": 91.4,
+                }
            },
        },
    )
@@ -82,9 +84,11 @@ class FCN_ResNet101_Weights(WeightsEnum):
            **_COMMON_META,
            "num_params": 54314346,
            "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#deeplabv3_resnet101",
-            "metrics": {
-                "miou": 63.7,
-                "pixel_acc": 91.9,
+            "_metrics": {
+                "COCO-val2017-VOC-labels": {
+                    "miou": 63.7,
+                    "pixel_acc": 91.9,
+                }
            },
        },
    )

--- a/torchvision/models/segmentation/lraspp.py
+++ b/torchvision/models/segmentation/lraspp.py
@@ -102,9 +102,11 @@ class LRASPP_MobileNet_V3_Large_Weights(WeightsEnum):
            "categories": _VOC_CATEGORIES,
            "min_size": (1, 1),
            "recipe": "https://github.com/pytorch/vision/tree/main/references/segmentation#lraspp_mobilenet_v3_large",
-            "metrics": {
-                "miou": 57.9,
-                "pixel_acc": 91.2,
+            "_metrics": {
+                "COCO-val2017-VOC-labels": {
+                    "miou": 57.9,
+                    "pixel_acc": 91.2,
+                }
            },
            "_docs": """
                These weights were trained on a subset of COCO, using only the 20 categories that are present in the

--- a/torchvision/models/shufflenetv2.py
+++ b/torchvision/models/shufflenetv2.py
@@ -198,9 +198,11 @@ class ShuffleNet_V2_X0_5_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 1366792,
-            "metrics": {
-                "acc@1": 60.552,
-                "acc@5": 81.746,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 60.552,
+                    "acc@5": 81.746,
+                }
            },
            "_docs": """These weights were trained from scratch to reproduce closely the results of the paper.""",
        },
@@ -216,9 +218,11 @@ class ShuffleNet_V2_X1_0_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 2278604,
-            "metrics": {
-                "acc@1": 69.362,
-                "acc@5": 88.316,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 69.362,
+                    "acc@5": 88.316,
+                }
            },
            "_docs": """These weights were trained from scratch to reproduce closely the results of the paper.""",
        },
@@ -234,9 +238,11 @@ class ShuffleNet_V2_X1_5_Weights(WeightsEnum):
            **_COMMON_META,
            "recipe": "https://github.com/pytorch/vision/pull/5906",
            "num_params": 3503624,
-            "metrics": {
-                "acc@1": 72.996,
-                "acc@5": 91.086,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 72.996,
+                    "acc@5": 91.086,
+                }
            },
            "_docs": """
                These weights were trained from scratch by using TorchVision's `new training recipe
@@ -255,9 +261,11 @@ class ShuffleNet_V2_X2_0_Weights(WeightsEnum):
            **_COMMON_META,
            "recipe": "https://github.com/pytorch/vision/pull/5906",
            "num_params": 7393996,
-            "metrics": {
-                "acc@1": 76.230,
-                "acc@5": 93.006,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 76.230,
+                    "acc@5": 93.006,
+                }
            },
            "_docs": """
                These weights were trained from scratch by using TorchVision's `new training recipe

--- a/torchvision/models/squeezenet.py
+++ b/torchvision/models/squeezenet.py
@@ -129,9 +129,11 @@ class SqueezeNet1_0_Weights(WeightsEnum):
            **_COMMON_META,
            "min_size": (21, 21),
            "num_params": 1248424,
-            "metrics": {
-                "acc@1": 58.092,
-                "acc@5": 80.420,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 58.092,
+                    "acc@5": 80.420,
+                }
            },
        },
    )
@@ -146,9 +148,11 @@ class SqueezeNet1_1_Weights(WeightsEnum):
            **_COMMON_META,
            "min_size": (17, 17),
            "num_params": 1235496,
-            "metrics": {
-                "acc@1": 58.178,
-                "acc@5": 80.624,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 58.178,
+                    "acc@5": 80.624,
+                }
            },
        },
    )

--- a/torchvision/models/swin_transformer.py
+++ b/torchvision/models/swin_transformer.py
@@ -417,9 +417,11 @@ class Swin_T_Weights(WeightsEnum):
            "num_params": 28288354,
            "min_size": (224, 224),
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#swintransformer",
-            "metrics": {
-                "acc@1": 81.358,
-                "acc@5": 95.526,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 81.358,
+                    "acc@5": 95.526,
+                }
            },
            "_docs": """These weights reproduce closely the results of the paper using its training recipe.""",
        },

--- a/torchvision/models/vgg.py
+++ b/torchvision/models/vgg.py
@@ -121,9 +121,11 @@ class VGG11_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 132863336,
-            "metrics": {
-                "acc@1": 69.020,
-                "acc@5": 88.628,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 69.020,
+                    "acc@5": 88.628,
+                }
            },
        },
    )
@@ -137,9 +139,11 @@ class VGG11_BN_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 132868840,
-            "metrics": {
-                "acc@1": 70.370,
-                "acc@5": 89.810,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 70.370,
+                    "acc@5": 89.810,
+                }
            },
        },
    )
@@ -153,9 +157,11 @@ class VGG13_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 133047848,
-            "metrics": {
-                "acc@1": 69.928,
-                "acc@5": 89.246,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 69.928,
+                    "acc@5": 89.246,
+                }
            },
        },
    )
@@ -169,9 +175,11 @@ class VGG13_BN_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 133053736,
-            "metrics": {
-                "acc@1": 71.586,
-                "acc@5": 90.374,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 71.586,
+                    "acc@5": 90.374,
+                }
            },
        },
    )
@@ -185,9 +193,11 @@ class VGG16_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 138357544,
-            "metrics": {
-                "acc@1": 71.592,
-                "acc@5": 90.382,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 71.592,
+                    "acc@5": 90.382,
+                }
            },
        },
    )
@@ -205,9 +215,11 @@ class VGG16_Weights(WeightsEnum):
            "num_params": 138357544,
            "categories": None,
            "recipe": "https://github.com/amdegroot/ssd.pytorch#training-ssd",
-            "metrics": {
-                "acc@1": float("nan"),
-                "acc@5": float("nan"),
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": float("nan"),
+                    "acc@5": float("nan"),
+                }
            },
            "_docs": """
                These weights can't be used for classification because they are missing values in the `classifier`
@@ -226,9 +238,11 @@ class VGG16_BN_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 138365992,
-            "metrics": {
-                "acc@1": 73.360,
-                "acc@5": 91.516,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 73.360,
+                    "acc@5": 91.516,
+                }
            },
        },
    )
@@ -242,9 +256,11 @@ class VGG19_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 143667240,
-            "metrics": {
-                "acc@1": 72.376,
-                "acc@5": 90.876,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 72.376,
+                    "acc@5": 90.876,
+                }
            },
        },
    )
@@ -258,9 +274,11 @@ class VGG19_BN_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 143678248,
-            "metrics": {
-                "acc@1": 74.218,
-                "acc@5": 91.842,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 74.218,
+                    "acc@5": 91.842,
+                }
            },
        },
    )

--- a/torchvision/models/video/resnet.py
+++ b/torchvision/models/video/resnet.py
@@ -323,9 +323,11 @@ class R3D_18_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 33371472,
-            "metrics": {
-                "acc@1": 52.75,
-                "acc@5": 75.45,
+            "_metrics": {
+                "Kinetics-400": {
+                    "acc@1": 52.75,
+                    "acc@5": 75.45,
+                }
            },
        },
    )
@@ -339,9 +341,11 @@ class MC3_18_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 11695440,
-            "metrics": {
-                "acc@1": 53.90,
-                "acc@5": 76.29,
+            "_metrics": {
+                "Kinetics-400": {
+                    "acc@1": 53.90,
+                    "acc@5": 76.29,
+                }
            },
        },
    )
@@ -355,9 +359,11 @@ class R2Plus1D_18_Weights(WeightsEnum):
        meta={
            **_COMMON_META,
            "num_params": 31505325,
-            "metrics": {
-                "acc@1": 57.50,
-                "acc@5": 78.81,
+            "_metrics": {
+                "Kinetics-400": {
+                    "acc@1": 57.50,
+                    "acc@5": 78.81,
+                }
            },
        },
    )

--- a/torchvision/models/vision_transformer.py
+++ b/torchvision/models/vision_transformer.py
@@ -328,9 +328,11 @@ class ViT_B_16_Weights(WeightsEnum):
            "num_params": 86567656,
            "min_size": (224, 224),
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_16",
-            "metrics": {
-                "acc@1": 81.072,
-                "acc@5": 95.318,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 81.072,
+                    "acc@5": 95.318,
+                }
            },
            "_docs": """
                These weights were trained from scratch by using a modified version of `DeIT
@@ -350,9 +352,11 @@ class ViT_B_16_Weights(WeightsEnum):
            **_COMMON_SWAG_META,
            "num_params": 86859496,
            "min_size": (384, 384),
-            "metrics": {
-                "acc@1": 85.304,
-                "acc@5": 97.650,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 85.304,
+                    "acc@5": 97.650,
+                }
            },
            "_docs": """
                These weights are learnt via transfer learning by end-to-end fine-tuning the original
@@ -373,9 +377,11 @@ class ViT_B_16_Weights(WeightsEnum):
            "recipe": "https://github.com/pytorch/vision/pull/5793",
            "num_params": 86567656,
            "min_size": (224, 224),
-            "metrics": {
-                "acc@1": 81.886,
-                "acc@5": 96.180,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 81.886,
+                    "acc@5": 96.180,
+                }
            },
            "_docs": """
                These weights are composed of the original frozen `SWAG <https://arxiv.org/abs/2201.08371>`_ trunk
@@ -395,9 +401,11 @@ class ViT_B_32_Weights(WeightsEnum):
            "num_params": 88224232,
            "min_size": (224, 224),
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_b_32",
-            "metrics": {
-                "acc@1": 75.912,
-                "acc@5": 92.466,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 75.912,
+                    "acc@5": 92.466,
+                }
            },
            "_docs": """
                These weights were trained from scratch by using a modified version of `DeIT
@@ -417,9 +425,11 @@ class ViT_L_16_Weights(WeightsEnum):
            "num_params": 304326632,
            "min_size": (224, 224),
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_16",
-            "metrics": {
-                "acc@1": 79.662,
-                "acc@5": 94.638,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 79.662,
+                    "acc@5": 94.638,
+                }
            },
            "_docs": """
                These weights were trained from scratch by using a modified version of TorchVision's
@@ -440,9 +450,11 @@ class ViT_L_16_Weights(WeightsEnum):
            **_COMMON_SWAG_META,
            "num_params": 305174504,
            "min_size": (512, 512),
-            "metrics": {
-                "acc@1": 88.064,
-                "acc@5": 98.512,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 88.064,
+                    "acc@5": 98.512,
+                }
            },
            "_docs": """
                These weights are learnt via transfer learning by end-to-end fine-tuning the original
@@ -463,9 +475,11 @@ class ViT_L_16_Weights(WeightsEnum):
            "recipe": "https://github.com/pytorch/vision/pull/5793",
            "num_params": 304326632,
            "min_size": (224, 224),
-            "metrics": {
-                "acc@1": 85.146,
-                "acc@5": 97.422,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 85.146,
+                    "acc@5": 97.422,
+                }
            },
            "_docs": """
                These weights are composed of the original frozen `SWAG <https://arxiv.org/abs/2201.08371>`_ trunk
@@ -485,9 +499,11 @@ class ViT_L_32_Weights(WeightsEnum):
            "num_params": 306535400,
            "min_size": (224, 224),
            "recipe": "https://github.com/pytorch/vision/tree/main/references/classification#vit_l_32",
-            "metrics": {
-                "acc@1": 76.972,
-                "acc@5": 93.07,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 76.972,
+                    "acc@5": 93.07,
+                }
            },
            "_docs": """
                These weights were trained from scratch by using a modified version of `DeIT
@@ -511,9 +527,11 @@ class ViT_H_14_Weights(WeightsEnum):
            **_COMMON_SWAG_META,
            "num_params": 633470440,
            "min_size": (518, 518),
-            "metrics": {
-                "acc@1": 88.552,
-                "acc@5": 98.694,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 88.552,
+                    "acc@5": 98.694,
+                }
            },
            "_docs": """
                These weights are learnt via transfer learning by end-to-end fine-tuning the original
@@ -534,9 +552,11 @@ class ViT_H_14_Weights(WeightsEnum):
            "recipe": "https://github.com/pytorch/vision/pull/5793",
            "num_params": 632045800,
            "min_size": (224, 224),
-            "metrics": {
-                "acc@1": 85.708,
-                "acc@5": 97.730,
+            "_metrics": {
+                "ImageNet-1K": {
+                    "acc@1": 85.708,
+                    "acc@5": 97.730,
+                }
            },
            "_docs": """
                These weights are composed of the original frozen `SWAG <https://arxiv.org/abs/2201.08371>`_ trunk