Unverified Commit 30ad1070 authored by fzyzcjy's avatar fzyzcjy Committed by GitHub
Browse files

Try to allow NCCL cumem for multi node nvlink case (#11987)

parent a80bcb5a
...@@ -677,6 +677,7 @@ class Engine(EngineBase): ...@@ -677,6 +677,7 @@ class Engine(EngineBase):
def _set_envs_and_config(server_args: ServerArgs): def _set_envs_and_config(server_args: ServerArgs):
# Set global environments # Set global environments
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
if "NCCL_CUMEM_ENABLE" not in os.environ:
os.environ["NCCL_CUMEM_ENABLE"] = str(int(server_args.enable_symm_mem)) os.environ["NCCL_CUMEM_ENABLE"] = str(int(server_args.enable_symm_mem))
if not server_args.enable_symm_mem: if not server_args.enable_symm_mem:
os.environ["NCCL_NVLS_ENABLE"] = str(int(server_args.enable_nccl_nvls)) os.environ["NCCL_NVLS_ENABLE"] = str(int(server_args.enable_nccl_nvls))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment