Commit be842037 authored by Boris Fomitchev's avatar Boris Fomitchev
Browse files

Removed assert in case of external ddp, exported initialize_megatron()


Signed-off-by: default avatarBoris Fomitchev <bfomitchev@nvidia.com>
parent 323c124d
...@@ -30,7 +30,7 @@ from .global_vars import get_tokenizer ...@@ -30,7 +30,7 @@ from .global_vars import get_tokenizer
from .global_vars import get_tensorboard_writer from .global_vars import get_tensorboard_writer
from .global_vars import get_adlr_autoresume from .global_vars import get_adlr_autoresume
from .global_vars import get_timers from .global_vars import get_timers
from .initialize import initialize_megatron
def print_rank_0(message): def print_rank_0(message):
"""If distributed is initialized print only on rank 0.""" """If distributed is initialized print only on rank 0."""
......
...@@ -93,11 +93,6 @@ def _initialize_distributed(): ...@@ -93,11 +93,6 @@ def _initialize_distributed():
'skipping initialization ...', flush=True) 'skipping initialization ...', flush=True)
args.rank = torch.distributed.get_rank() args.rank = torch.distributed.get_rank()
args.world_size = torch.distributed.get_world_size() args.world_size = torch.distributed.get_world_size()
if device_count > 0:
device = torch.cuda.current_device()
local_rank = args.rank % device_count
assert local_rank == device, \
'expected local-rank to be the same as rank % device-count.'
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment