update comment

d439b417 · Vijay Korthikanti · 9da6e975 · d439b417
Commit d439b417 authored Jan 04, 2022 by Vijay Korthikanti
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 3 deletions

megatron/model/module.py megatron/model/module.py +2 -3

No files found.
--- a/megatron/model/module.py
+++ b/megatron/model/module.py
@@ -109,9 +109,8 @@ class MegatronModule(torch.nn.Module):
                torch.distributed.all_reduce(self.word_embeddings_weight().data,
                                             group=mpu.get_embedding_group())
-            # All-reduce other embeddings as well as necessary. The last stage
+            # Ensure that encoder(first stage) and decoder(split stage) position 
-            # does not have these other embeddings, so just create placeholder
+            # embeddings have the same initial parameter values
-            # tensors of the right shape with all zeros.
            # NOTE: We don't currently support T5 with the interleaved schedule.
            if mpu.is_rank_in_position_embedding_group() and \
                    args.pipeline_model_parallel_split_rank is not None: