Commit f3e57f6f authored by Sangkug Lym's avatar Sangkug Lym
Browse files

remove increasing nccl stream for overlapping allreduce and gemm

parent 6e1bde1e
...@@ -176,22 +176,11 @@ def _initialize_distributed(): ...@@ -176,22 +176,11 @@ def _initialize_distributed():
else: else:
args.local_rank = device args.local_rank = device
torch.cuda.set_device(device) torch.cuda.set_device(device)
# Increase cuda stream priority of NCCL ops when overlapping with other ops # Call the init process
if (not args.no_async_tensor_model_parallel_allreduce and torch.distributed.init_process_group(
args.tensor_model_parallel_size > 1): backend=args.distributed_backend,
from torch._C._distributed_c10d import ProcessGroupNCCL world_size=args.world_size, rank=args.rank,
timeout=timedelta(days=7))
pg_options = ProcessGroupNCCL.Options()
pg_options.is_high_priority_stream = True
pg_options._timeout = timedelta(days=7)
else:
pg_options = None
# Call the init process
torch.distributed.init_process_group(
backend=args.distributed_backend,
world_size=args.world_size, rank=args.rank,
timeout=timedelta(days=7),
pg_options=pg_options)
# Set the tensor model-parallel, pipeline model-parallel, and # Set the tensor model-parallel, pipeline model-parallel, and
# data-parallel communicators. # data-parallel communicators.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment