Fix default momentum value of BatchNorm2d in MaxViT from 0.99 to 0.01 (#8312)

6d64cb3a · Nicolas Hug · GitHub · 9563e3e3 · 6d64cb3a
Unverified Commit 6d64cb3a authored Mar 13, 2024 by Nicolas Hug Committed by GitHub Mar 13, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 3 deletions

torchvision/models/maxvit.py torchvision/models/maxvit.py +4 -3

No files found.
--- a/torchvision/models/maxvit.py
+++ b/torchvision/models/maxvit.py
@@ -577,7 +577,7 @@ class MaxVit(nn.Module):
        stochastic_depth_prob (float): Probability of stochastic depth. Expands to a list of probabilities for each layer that scales linearly to the specified value.
        squeeze_ratio (float): Squeeze ratio in the SE Layer. Default: 0.25.
        expansion_ratio (float): Expansion ratio in the MBConv bottleneck. Default: 4.
-        norm_layer (Callable[..., nn.Module]): Normalization function. Default: None (setting to None will produce a `BatchNorm2d(eps=1e-3, momentum=0.99)`).
+        norm_layer (Callable[..., nn.Module]): Normalization function. Default: None (setting to None will produce a `BatchNorm2d(eps=1e-3, momentum=0.01)`).
        activation_layer (Callable[..., nn.Module]): Activation function Default: nn.GELU.
        head_dim (int): Dimension of the attention heads.
        mlp_ratio (int): Expansion ratio of the MLP layer. Default: 4.
@@ -623,7 +623,7 @@ class MaxVit(nn.Module):
        # https://github.com/google-research/maxvit/blob/da76cf0d8a6ec668cc31b399c4126186da7da944/maxvit/models/maxvit.py#L1029-L1030
        # for the exact parameters used in batchnorm
        if norm_layer is None:
-            norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.99)
+            norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.01)
        # Make sure input size will be divisible by the partition size in all blocks
        # Undefined behavior if H or W are not divisible by p
@@ -788,7 +788,8 @@ class MaxVit_T_Weights(WeightsEnum):
            },
            "_ops": 5.558,
            "_file_size": 118.769,
-            "_docs": """These weights reproduce closely the results of the paper using a similar training recipe.""",
+            "_docs": """These weights reproduce closely the results of the paper using a similar training recipe.
+            They were trained with a BatchNorm2D momentum of 0.99 instead of the more correct 0.01.""",
        },
    )
    DEFAULT = IMAGENET1K_V1