Turn scatter-gather optimization on by default

72e4230b · Deepak Narayanan · dcef9069 · 72e4230b
Commit 72e4230b authored Feb 16, 2021 by Deepak Narayanan
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 2 deletions

megatron/arguments.py megatron/arguments.py +3 -2

No files found.
--- a/megatron/arguments.py
+++ b/megatron/arguments.py
@@ -578,8 +578,9 @@ def _add_distributed_args(parser):
                       choices=['local', 'torch'],
                       help='which DistributedDataParallel implementation '
                       'to use.')
-    group.add_argument('--scatter-gather-tensors-in-pipeline', action='store_true',
-                       help='Use scatter/gather to optimize communication of tensors in pipeline')
+    group.add_argument('--no-scatter-gather-tensors-in-pipeline', action='store_false',
+                       help='Use scatter/gather to optimize communication of tensors in pipeline',
+                       dest='scatter_gather_tensors_in_pipeline')
    group.add_argument('--local_rank', type=int, default=None,
                       help='local rank passed from distributed launcher.')
    group.add_argument('--lazy-mpu-init', type=bool, required=False,