Commit 6e433055 authored by mohammad's avatar mohammad
Browse files

fix for nemo: do not initialize mpu if it is already initialized

parent c63906a6
...@@ -123,7 +123,10 @@ def _initialize_distributed(): ...@@ -123,7 +123,10 @@ def _initialize_distributed():
# Set the model-parallel / data-parallel communicators. # Set the model-parallel / data-parallel communicators.
if device_count > 0: if device_count > 0:
mpu.initialize_model_parallel(args.model_parallel_size) if mpu.model_parallel_is_initialized():
print('model parallel is already initialized')
else:
mpu.initialize_model_parallel(args.model_parallel_size)
def _init_autoresume(): def _init_autoresume():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment