Fix bug - layer names and activation from previous refactor (#17524)

* Fix activation and layers in MLP head * Remove unused import

Fix bug - layer names and activation from previous refactor (#17524)
* Fix activation and layers in MLP head * Remove unused import
1c57242d · amyeroberts · GitHub · babeff55 · 1c57242d · 1c57242d
Unverified Commit 1c57242d authored Jun 03, 2022 by amyeroberts Committed by GitHub Jun 03, 2022
2 changed files
--- a/src/transformers/models/maskformer/modeling_maskformer.py
+++ b/src/transformers/models/maskformer/modeling_maskformer.py
@@ -2116,26 +2116,10 @@ class MaskFormerSinePositionEmbedding(nn.Module):
        return pos
-class IdentityBlock(nn.Module):
+class PredictionBlock(nn.Module):
-    def __init__(self):
+    def __init__(self, in_dim: int, out_dim: int, activation: nn.Module) -> None:
        super().__init__()
-        # Create as an iterable here so that the identity layer isn't registered
+        self.layers = [nn.Linear(in_dim, out_dim), activation]
-        # with the name of the instance variable its assigned to
-        self.layers = [nn.Identity()]
-        # Maintain submodule indexing as if part of a Sequential block
-        self.add_module("0", self.layers[0])
-    def forward(self, input: Tensor) -> Tensor:
-        hidden_state = input
-        for layer in self.layers:
-            hidden_state = layer(hidden_state)
-        return hidden_state
-class NonLinearBlock(nn.Module):
-    def __init__(self, in_dim: int, out_dim: int) -> None:
-        super().__init__()
-        self.layers = [nn.Linear(in_dim, out_dim), nn.ReLU(inplace=True)]
        # Maintain submodule indexing as if part of a Sequential block
        for i, layer in enumerate(self.layers):
            self.add_module(str(i), layer)
@@ -2168,7 +2152,8 @@ class MaskformerMLPPredictionHead(nn.Module):
        self.layers = []
        for i, (in_dim, out_dim) in enumerate(zip(in_dims, out_dims)):
-            layer = NonLinearBlock(in_dim, out_dim) if i < num_layers - 1 else IdentityBlock()
+            activation = nn.ReLU() if i < num_layers - 1 else nn.Identity()
+            layer = PredictionBlock(in_dim, out_dim, activation=activation)
            self.layers.append(layer)
            # Provide backwards compatibility from when the class inherited from nn.Sequential
            # In nn.Sequential subclasses, the name given to the layer is its index in the sequence.

--- a/src/transformers/models/resnet/modeling_resnet.py
+++ b/src/transformers/models/resnet/modeling_resnet.py
@@ -181,19 +181,11 @@ class ResNetStage(nn.Module):
        layer = ResNetBottleNeckLayer if config.layer_type == "bottleneck" else ResNetBasicLayer
-        self.layers = [
+        self.layers = nn.Sequential(
            # downsampling is done in the first layer with stride of 2
            layer(in_channels, out_channels, stride=stride, activation=config.hidden_act),
            *[layer(out_channels, out_channels, activation=config.hidden_act) for _ in range(depth - 1)],
-        ]
+        )
-        # Provide backwards compatibility from when the class inherited from nn.Sequential
-        # In nn.Sequential subclasses, the name given to the layer is its index in the sequence.
-        # In nn.Module subclasses they derived from the instance attribute they are assigned to e.g.
-        # self.my_layer_name = Layer()
-        # We can't give instance attributes integer names i.e. self.0 is not permitted and so need to register
-        # the module explicitly
-        for i, layer in enumerate(self.layers):
-            self.add_module(str(i), layer)
    def forward(self, input: Tensor) -> Tensor:
        hidden_state = input