Merge pull request #100 from laekov/doc-fix

Fix document for megatron

Merge pull request #100 from laekov/doc-fix
Fix document for megatron
98e24e24 · zms1999 · GitHub · 9ceebeb7 · 7bdf58c9 · 98e24e24
Unverified Commit 98e24e24 authored Mar 28, 2022 by zms1999 Committed by GitHub Mar 28, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

fmoe/megatron/balance.py fmoe/megatron/balance.py +3 -0

fmoe/megatron/layers.py fmoe/megatron/layers.py +0 -3

No files found.
--- a/fmoe/megatron/balance.py
+++ b/fmoe/megatron/balance.py
@@ -46,6 +46,9 @@ def generate_megatron_gate_hook(layer_idx, num_expert_global):
 def add_balance_log(model, writer, iteration):
+    r"""
+    Note that this function does not work with pipeline parallelism
+    """
    from megatron import is_last_rank
    while hasattr(model, 'module'):

--- a/fmoe/megatron/layers.py
+++ b/fmoe/megatron/layers.py
@@ -162,9 +162,6 @@ def fmoefy(
    they are trained in data-parallel mode. This can be useful when testing on
    small models that do not require high training throughput or large parameter
    capacity.
-    Note that pipeline parallel is not supported yet. When distributed experts
-    are enabled, their communicator should be Megatron's
-    tensor_model_parall_comm x data_parallel_comm, which is not created.
    """
    from megatron import get_args