fix layernorm and remove unnecessary bias

1f48a6b3 · Jiezhong Qiu · fa023f32 · 1f48a6b3
Commit 1f48a6b3 authored Feb 24, 2021 by Jiezhong Qiu
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 6 deletions

examples/transformer-xl/mem_transformer.py examples/transformer-xl/mem_transformer.py +2 -6

No files found.
--- a/examples/transformer-xl/mem_transformer.py
+++ b/examples/transformer-xl/mem_transformer.py
@@ -824,16 +824,12 @@ class CustomizedMoEPositionwiseFF(FMoETransformerMLP):
        def activation(x):
            return self.dropout(F.relu(x))
        super().__init__(num_expert=moe_num_expert, d_model=d_model, d_hidden=d_inner, top_k=moe_top_k,
-                pre_lnorm=pre_lnorm, activation=activation)
+                do_lnorm=True, pre_lnorm=pre_lnorm, activation=activation)
        self.dropout = nn.Dropout(dropout)
-        self.bias = nn.Parameter(
-            torch.zeros(d_model, dtype=torch.float32)
-        )
    def forward(self, x):
        x = super().forward(x)
-        return x + self.bias
+        return x
 class DecoderLayer(nn.Module):
    def __init__(self, n_head, d_model, d_head, d_inner, dropout, **kwargs):