Unverified Commit ae466a87 authored by Antoni Baum's avatar Antoni Baum Committed by GitHub
Browse files

fix(server): Do not init process group if already initialized (#388)

parent aefde28b
...@@ -2,6 +2,7 @@ import os ...@@ -2,6 +2,7 @@ import os
import torch import torch
from datetime import timedelta from datetime import timedelta
from loguru import logger
class FakeBarrier: class FakeBarrier:
...@@ -59,6 +60,8 @@ def initialize_torch_distributed(): ...@@ -59,6 +60,8 @@ def initialize_torch_distributed():
else: else:
if os.getenv("DEBUG", None) == "1": if os.getenv("DEBUG", None) == "1":
return FakeGroup(rank, world_size), rank, world_size return FakeGroup(rank, world_size), rank, world_size
if not torch.distributed.is_initialized():
# Call the init process. # Call the init process.
torch.distributed.init_process_group( torch.distributed.init_process_group(
backend=backend, backend=backend,
...@@ -67,5 +70,7 @@ def initialize_torch_distributed(): ...@@ -67,5 +70,7 @@ def initialize_torch_distributed():
timeout=timedelta(seconds=60), timeout=timedelta(seconds=60),
pg_options=options, pg_options=options,
) )
else:
logger.warning("torch.distributed is already initialized.")
return torch.distributed.group.WORLD, rank, world_size return torch.distributed.group.WORLD, rank, world_size
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment