let fmoe.megatron use correct mpu

66f7166d · Rick Ho · b0990e4b · 66f7166d
Commit 66f7166d authored Feb 26, 2021 by Rick Ho
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

fmoe/megatron.py fmoe/megatron.py +2 -1

No files found.
--- a/fmoe/megatron.py
+++ b/fmoe/megatron.py
@@ -99,6 +99,7 @@ def fmoefy(model, num_experts=None, distributed_experts=True,
    tensor_model_parall_comm x data_parallel_comm, which is not created.
    '''
    from megatron import get_args
+    from megatron import mpu
    args = get_args()
    if num_experts is not None:
        args.num_experts = num_experts
@@ -121,7 +122,7 @@ def fmoefy(model, num_experts=None, distributed_experts=True,
        args.distributed_experts = distributed_experts
    for l in model.language_model.transformer.layers:
-        l.mlp = MegatronMLP(args, None)
+        l.mlp = MegatronMLP(args, mpu.get_model_parallel_group())
    return model