"...python/git@developer.sourcefind.cn:change/sglang.git" did not exist on "737d73ed5bcb5cd161c142551c7e43257e8aa130"
Unverified Commit 6d64cb3a authored by Nicolas Hug's avatar Nicolas Hug Committed by GitHub
Browse files

Fix default momentum value of BatchNorm2d in MaxViT from 0.99 to 0.01 (#8312)

parent 9563e3e3
...@@ -577,7 +577,7 @@ class MaxVit(nn.Module): ...@@ -577,7 +577,7 @@ class MaxVit(nn.Module):
stochastic_depth_prob (float): Probability of stochastic depth. Expands to a list of probabilities for each layer that scales linearly to the specified value. stochastic_depth_prob (float): Probability of stochastic depth. Expands to a list of probabilities for each layer that scales linearly to the specified value.
squeeze_ratio (float): Squeeze ratio in the SE Layer. Default: 0.25. squeeze_ratio (float): Squeeze ratio in the SE Layer. Default: 0.25.
expansion_ratio (float): Expansion ratio in the MBConv bottleneck. Default: 4. expansion_ratio (float): Expansion ratio in the MBConv bottleneck. Default: 4.
norm_layer (Callable[..., nn.Module]): Normalization function. Default: None (setting to None will produce a `BatchNorm2d(eps=1e-3, momentum=0.99)`). norm_layer (Callable[..., nn.Module]): Normalization function. Default: None (setting to None will produce a `BatchNorm2d(eps=1e-3, momentum=0.01)`).
activation_layer (Callable[..., nn.Module]): Activation function Default: nn.GELU. activation_layer (Callable[..., nn.Module]): Activation function Default: nn.GELU.
head_dim (int): Dimension of the attention heads. head_dim (int): Dimension of the attention heads.
mlp_ratio (int): Expansion ratio of the MLP layer. Default: 4. mlp_ratio (int): Expansion ratio of the MLP layer. Default: 4.
...@@ -623,7 +623,7 @@ class MaxVit(nn.Module): ...@@ -623,7 +623,7 @@ class MaxVit(nn.Module):
# https://github.com/google-research/maxvit/blob/da76cf0d8a6ec668cc31b399c4126186da7da944/maxvit/models/maxvit.py#L1029-L1030 # https://github.com/google-research/maxvit/blob/da76cf0d8a6ec668cc31b399c4126186da7da944/maxvit/models/maxvit.py#L1029-L1030
# for the exact parameters used in batchnorm # for the exact parameters used in batchnorm
if norm_layer is None: if norm_layer is None:
norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.99) norm_layer = partial(nn.BatchNorm2d, eps=1e-3, momentum=0.01)
# Make sure input size will be divisible by the partition size in all blocks # Make sure input size will be divisible by the partition size in all blocks
# Undefined behavior if H or W are not divisible by p # Undefined behavior if H or W are not divisible by p
...@@ -788,7 +788,8 @@ class MaxVit_T_Weights(WeightsEnum): ...@@ -788,7 +788,8 @@ class MaxVit_T_Weights(WeightsEnum):
}, },
"_ops": 5.558, "_ops": 5.558,
"_file_size": 118.769, "_file_size": 118.769,
"_docs": """These weights reproduce closely the results of the paper using a similar training recipe.""", "_docs": """These weights reproduce closely the results of the paper using a similar training recipe.
They were trained with a BatchNorm2D momentum of 0.99 instead of the more correct 0.01.""",
}, },
) )
DEFAULT = IMAGENET1K_V1 DEFAULT = IMAGENET1K_V1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment