Commit 96ed8976 authored by Jiezhong Qiu's avatar Jiezhong Qiu
Browse files

put dropout after relu

although order of dp and relu doesn't matter
parent 03b2a725
...@@ -380,8 +380,8 @@ from fmoe import FMoETransformerMLP ...@@ -380,8 +380,8 @@ from fmoe import FMoETransformerMLP
class CustomizedMoEPositionwiseFF(FMoETransformerMLP): class CustomizedMoEPositionwiseFF(FMoETransformerMLP):
def __init__(self, d_model, d_inner, dropout, pre_lnorm=False, moe_num_expert=64, moe_top_k=2): def __init__(self, d_model, d_inner, dropout, pre_lnorm=False, moe_num_expert=64, moe_top_k=2):
activation = nn.Sequential( activation = nn.Sequential(
nn.Dropout(dropout),
nn.ReLU() nn.ReLU()
nn.Dropout(dropout),
) )
super().__init__(num_expert=moe_num_expert, d_model=d_model, d_hidden=d_inner, top_k=moe_top_k, super().__init__(num_expert=moe_num_expert, d_model=d_model, d_hidden=d_inner, top_k=moe_top_k,
do_lnorm=True, pre_lnorm=pre_lnorm, activation=activation, dropout=dropout) do_lnorm=True, pre_lnorm=pre_lnorm, activation=activation, dropout=dropout)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment