"tests/vscode:/vscode.git/clone" did not exist on "e49dca6efdb32b094fa8f7db89e5943aa64f13c8"
Unverified Commit b8318aec authored by Ata Fatahi's avatar Ata Fatahi Committed by GitHub
Browse files

Make NCCL NVLS configurable (#3502)

parent 2f482210
...@@ -297,7 +297,7 @@ def _set_envs_and_config(server_args: ServerArgs): ...@@ -297,7 +297,7 @@ def _set_envs_and_config(server_args: ServerArgs):
# Set global environments # Set global environments
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
os.environ["NCCL_CUMEM_ENABLE"] = "0" os.environ["NCCL_CUMEM_ENABLE"] = "0"
os.environ["NCCL_NVLS_ENABLE"] = "0" os.environ["NCCL_NVLS_ENABLE"] = str(int(server_args.enable_nccl_nvls))
os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1" os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1"
os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "4" os.environ["CUDA_DEVICE_MAX_CONNECTIONS"] = "4"
......
...@@ -140,6 +140,7 @@ class ServerArgs: ...@@ -140,6 +140,7 @@ class ServerArgs:
disable_jump_forward: bool = False disable_jump_forward: bool = False
disable_cuda_graph: bool = False disable_cuda_graph: bool = False
disable_cuda_graph_padding: bool = False disable_cuda_graph_padding: bool = False
enable_nccl_nvls: bool = False
disable_outlines_disk_cache: bool = False disable_outlines_disk_cache: bool = False
disable_custom_all_reduce: bool = False disable_custom_all_reduce: bool = False
disable_mla: bool = False disable_mla: bool = False
...@@ -783,6 +784,11 @@ class ServerArgs: ...@@ -783,6 +784,11 @@ class ServerArgs:
action="store_true", action="store_true",
help="Disable cuda graph when padding is needed. Still uses cuda graph when padding is not needed.", help="Disable cuda graph when padding is needed. Still uses cuda graph when padding is not needed.",
) )
parser.add_argument(
"--enable-nccl-nvls",
action="store_true",
help="Enable NCCL NVLS for prefill heavy requests when available.",
)
parser.add_argument( parser.add_argument(
"--disable-outlines-disk-cache", "--disable-outlines-disk-cache",
action="store_true", action="store_true",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment