Unverified Commit 01d66ae2 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Fix multi-node deadlock (#709)

parent a523a3c1
...@@ -187,6 +187,7 @@ def launch_server( ...@@ -187,6 +187,7 @@ def launch_server(
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["NCCL_CUMEM_ENABLE"] = "0" os.environ["NCCL_CUMEM_ENABLE"] = "0"
os.environ["NCCL_NVLS_ENABLE"] = "0" os.environ["NCCL_NVLS_ENABLE"] = "0"
os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1"
set_ulimit() set_ulimit()
if server_args.show_time_cost: if server_args.show_time_cost:
enable_show_time_cost() enable_show_time_cost()
......
...@@ -312,6 +312,9 @@ def suppress_other_loggers(): ...@@ -312,6 +312,9 @@ def suppress_other_loggers():
logging.getLogger("vllm.distributed.device_communicators.pynccl").setLevel( logging.getLogger("vllm.distributed.device_communicators.pynccl").setLevel(
logging.WARN logging.WARN
) )
logging.getLogger("vllm.distributed.device_communicators.shm_broadcast").setLevel(
logging.WARN
)
logging.getLogger("vllm.selector").setLevel(logging.WARN) logging.getLogger("vllm.selector").setLevel(logging.WARN)
logging.getLogger("vllm.utils").setLevel(logging.WARN) logging.getLogger("vllm.utils").setLevel(logging.WARN)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment