Commit e30ad67e authored by Lawrence McAfee's avatar Lawrence McAfee
Browse files

cleaned arguments.py.

parent 14e60427
...@@ -135,11 +135,9 @@ def parse_args(extra_args_provider=None, defaults={}, ...@@ -135,11 +135,9 @@ def parse_args(extra_args_provider=None, defaults={},
args.global_batch_size), flush=True) args.global_batch_size), flush=True)
assert args.global_batch_size > 0 assert args.global_batch_size > 0
if args.num_layers_per_virtual_pipeline_stage is not None: if args.num_layers_per_virtual_pipeline_stage is not None:
# >>> [ temporarily turning off ] assert args.pipeline_model_parallel_size > 2, \
# assert args.pipeline_model_parallel_size > 2, \ 'pipeline-model-parallel size should be greater than 2 with ' \
# 'pipeline-model-parallel size should be greater than 2 with ' \ 'interleaved schedule'
# 'interleaved schedule'
# <<<
assert args.num_layers % args.num_layers_per_virtual_pipeline_stage == 0, \ assert args.num_layers % args.num_layers_per_virtual_pipeline_stage == 0, \
'number of layers is not divisible by number of layers per virtual ' \ 'number of layers is not divisible by number of layers per virtual ' \
'pipeline stage' 'pipeline stage'
...@@ -183,13 +181,11 @@ def parse_args(extra_args_provider=None, defaults={}, ...@@ -183,13 +181,11 @@ def parse_args(extra_args_provider=None, defaults={},
'gradient accumulation. Setting gradient_accumulation_fusion ' 'gradient accumulation. Setting gradient_accumulation_fusion '
'to False', flush=True) 'to False', flush=True)
# >>>
# If we use the distributed optimizer, we need to have local DDP # If we use the distributed optimizer, we need to have local DDP
# and we should make sure use-contiguous-buffers-in-local-ddp is on. # and we should make sure use-contiguous-buffers-in-local-ddp is on.
if args.use_distributed_optimizer: if args.use_distributed_optimizer:
assert args.DDP_impl == 'local' assert args.DDP_impl == 'local'
assert args.use_contiguous_buffers_in_local_ddp assert args.use_contiguous_buffers_in_local_ddp
# <<<
# For torch DDP, we do not use contiguous buffer # For torch DDP, we do not use contiguous buffer
if args.DDP_impl == 'torch': if args.DDP_impl == 'torch':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment