Unverified Commit 8ecaf2e8 authored by Boyuan Deng's avatar Boyuan Deng Committed by GitHub
Browse files

[Enhancement] Set environment variables in _init_dist_mpi (#1682)



* [Enhancement] Initialize 'mpi' launcher from OpenMPI's environmental variables

* raise exception when no MASTER_ADDR
Co-authored-by: default avatarjzwang <841713301@qq.com>
parent 887d9a0f
...@@ -64,10 +64,15 @@ def _init_dist_pytorch(backend, **kwargs): ...@@ -64,10 +64,15 @@ def _init_dist_pytorch(backend, **kwargs):
def _init_dist_mpi(backend, **kwargs): def _init_dist_mpi(backend, **kwargs):
# TODO: use local_rank instead of rank % num_gpus local_rank = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK'])
rank = int(os.environ['OMPI_COMM_WORLD_RANK']) torch.cuda.set_device(local_rank)
num_gpus = torch.cuda.device_count() if 'MASTER_PORT' not in os.environ:
torch.cuda.set_device(rank % num_gpus) # 29500 is torch.distributed default port
os.environ['MASTER_PORT'] = '29500'
if 'MASTER_ADDR' not in os.environ:
raise KeyError('The environment variable MASTER_ADDR is not set')
os.environ['WORLD_SIZE'] = os.environ['OMPI_COMM_WORLD_SIZE']
os.environ['RANK'] = os.environ['OMPI_COMM_WORLD_RANK']
dist.init_process_group(backend=backend, **kwargs) dist.init_process_group(backend=backend, **kwargs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment