Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
b8318aec
"tests/vscode:/vscode.git/clone" did not exist on "e49dca6efdb32b094fa8f7db89e5943aa64f13c8"
Unverified
Commit
b8318aec
authored
Feb 11, 2025
by
Ata Fatahi
Committed by
GitHub
Feb 12, 2025
Browse files
Make NCCL NVLS configurable (#3502)
parent
2f482210
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
1 deletion
+7
-1
python/sglang/srt/entrypoints/engine.py
python/sglang/srt/entrypoints/engine.py
+1
-1
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+6
-0
No files found.
python/sglang/srt/entrypoints/engine.py
View file @
b8318aec
...
@@ -297,7 +297,7 @@ def _set_envs_and_config(server_args: ServerArgs):
...
@@ -297,7 +297,7 @@ def _set_envs_and_config(server_args: ServerArgs):
# Set global environments
# Set global environments
os
.
environ
[
"TF_CPP_MIN_LOG_LEVEL"
]
=
"3"
os
.
environ
[
"TF_CPP_MIN_LOG_LEVEL"
]
=
"3"
os
.
environ
[
"NCCL_CUMEM_ENABLE"
]
=
"0"
os
.
environ
[
"NCCL_CUMEM_ENABLE"
]
=
"0"
os
.
environ
[
"NCCL_NVLS_ENABLE"
]
=
"0"
os
.
environ
[
"NCCL_NVLS_ENABLE"
]
=
str
(
int
(
server_args
.
enable_nccl_nvls
))
os
.
environ
[
"TORCH_NCCL_AVOID_RECORD_STREAMS"
]
=
"1"
os
.
environ
[
"TORCH_NCCL_AVOID_RECORD_STREAMS"
]
=
"1"
os
.
environ
[
"CUDA_DEVICE_MAX_CONNECTIONS"
]
=
"4"
os
.
environ
[
"CUDA_DEVICE_MAX_CONNECTIONS"
]
=
"4"
...
...
python/sglang/srt/server_args.py
View file @
b8318aec
...
@@ -140,6 +140,7 @@ class ServerArgs:
...
@@ -140,6 +140,7 @@ class ServerArgs:
disable_jump_forward
:
bool
=
False
disable_jump_forward
:
bool
=
False
disable_cuda_graph
:
bool
=
False
disable_cuda_graph
:
bool
=
False
disable_cuda_graph_padding
:
bool
=
False
disable_cuda_graph_padding
:
bool
=
False
enable_nccl_nvls
:
bool
=
False
disable_outlines_disk_cache
:
bool
=
False
disable_outlines_disk_cache
:
bool
=
False
disable_custom_all_reduce
:
bool
=
False
disable_custom_all_reduce
:
bool
=
False
disable_mla
:
bool
=
False
disable_mla
:
bool
=
False
...
@@ -783,6 +784,11 @@ class ServerArgs:
...
@@ -783,6 +784,11 @@ class ServerArgs:
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"Disable cuda graph when padding is needed. Still uses cuda graph when padding is not needed."
,
help
=
"Disable cuda graph when padding is needed. Still uses cuda graph when padding is not needed."
,
)
)
parser
.
add_argument
(
"--enable-nccl-nvls"
,
action
=
"store_true"
,
help
=
"Enable NCCL NVLS for prefill heavy requests when available."
,
)
parser
.
add_argument
(
parser
.
add_argument
(
"--disable-outlines-disk-cache"
,
"--disable-outlines-disk-cache"
,
action
=
"store_true"
,
action
=
"store_true"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment