renamed argument; 'embed' -> 'embedding'

c04c4977 · Lawrence McAfee · b93bef00 · c04c4977 · c04c4977 · c04c4977
Commit c04c4977 authored Feb 04, 2022 by Lawrence McAfee
Showing with 6 additions and 6 deletions

megatron/arguments.py megatron/arguments.py +2 -2

megatron/model/transformer.py megatron/model/transformer.py +2 -2

megatron/mpu/initialize.py megatron/mpu/initialize.py +2 -2

No files found.
--- a/megatron/arguments.py
+++ b/megatron/arguments.py
@@ -68,7 +68,7 @@ def parse_args(extra_args_provider=None, defaults={},
        (args.world_size // args.tensor_model_parallel_size))
    args.transformer_pipeline_model_parallel_size = (
        args.pipeline_model_parallel_size - 1
-        if args.standalone_embed_stage else
+        if args.standalone_embedding_stage else
        args.pipeline_model_parallel_size
    )
    # Checks.
@@ -689,7 +689,7 @@ def _add_distributed_args(parser):
                       help='Call torch.cuda.empty_cache() each iteration '
                       '(training and eval), to reduce fragmentation.'
                       '0=off, 1=moderate, 2=aggressive.')
-    group.add_argument('--standalone-embed-stage', action='store_true',
+    group.add_argument('--standalone-embedding-stage', action='store_true',
                       default=False, help='If set, *input* embedding layer '
                       'is placed on its own pipeline stage, without any '
                       'transformer layers. (For T5, this flag currently only '

--- a/megatron/model/transformer.py
+++ b/megatron/model/transformer.py
@@ -546,7 +546,7 @@ class NoopTransformerLayer(MegatronModule):
    """A single 'no-op' transformer layer.

    The sole purpose of this layer is for when a standalone embedding layer
-    is used (i.e., args.standalone_embed_stage == True). In this case,
+    is used (i.e., args.standalone_embedding_stage == True). In this case,
    zero transformer layers are assigned when pipeline rank == 0. Additionally,
    when virtual pipeline rank >= 1, zero total model parameters are created
    (virtual rank 0 contains the input embedding). This results in the model's
@@ -635,7 +635,7 @@ class ParallelTransformer(MegatronModule):

        if self.num_layers == 0:
            # When a standalone embedding stage is used (e.g.,
-            # args.standalone_embed_stage == True), virtual pipeline ranks
+            # args.standalone_embedding_stage == True), virtual pipeline ranks
            # on pipeline rank 0 will have zero transformer layers assigned to
            # them. This results in the model's input and output tensors to be
            # the same, which will cause failure for certain output tensor

--- a/megatron/mpu/initialize.py
+++ b/megatron/mpu/initialize.py
@@ -330,7 +330,7 @@ def get_num_layers(args, is_encoder_and_decoder_model):
            # the same whether or not a standalone embedding stage is used.
            num_ranks_in_encoder = (
                args.pipeline_model_parallel_split_rank - 1
-                if args.standalone_embed_stage else
+                if args.standalone_embedding_stage else
                args.pipeline_model_parallel_split_rank
            )
            num_ranks_in_decoder = args.transformer_pipeline_model_parallel_size - num_ranks_in_encoder
@@ -352,7 +352,7 @@ def get_num_layers(args, is_encoder_and_decoder_model):
            # or no layers at all (virtual pp rank >= 1).
            num_layers = (
                0
-                if args.standalone_embed_stage
+                if args.standalone_embedding_stage
                and get_pipeline_model_parallel_rank() == 0 else
                args.num_layers // args.transformer_pipeline_model_parallel_size
            )