Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
b8318aec
Unverified
Commit
b8318aec
authored
Feb 11, 2025
by
Ata Fatahi
Committed by
GitHub
Feb 12, 2025
Browse files
Make NCCL NVLS configurable (#3502)
parent
2f482210
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
1 deletion
+7
-1
python/sglang/srt/entrypoints/engine.py
python/sglang/srt/entrypoints/engine.py
+1
-1
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+6
-0
No files found.
python/sglang/srt/entrypoints/engine.py
View file @
b8318aec
...
@@ -297,7 +297,7 @@ def _set_envs_and_config(server_args: ServerArgs):
...
@@ -297,7 +297,7 @@ def _set_envs_and_config(server_args: ServerArgs):
# Set global environments
# Set global environments
os
.
environ
[
"TF_CPP_MIN_LOG_LEVEL"
]
=
"3"
os
.
environ
[
"TF_CPP_MIN_LOG_LEVEL"
]
=
"3"
os
.
environ
[
"NCCL_CUMEM_ENABLE"
]
=
"0"
os
.
environ
[
"NCCL_CUMEM_ENABLE"
]
=
"0"
os
.
environ
[
"NCCL_NVLS_ENABLE"
]
=
"0"
os
.
environ
[
"NCCL_NVLS_ENABLE"
]
=
str
(
int
(
server_args
.
enable_nccl_nvls
))
os
.
environ
[
"TORCH_NCCL_AVOID_RECORD_STREAMS"
]
=
"1"
os
.
environ
[
"TORCH_NCCL_AVOID_RECORD_STREAMS"
]
=
"1"
os
.
environ
[
"CUDA_DEVICE_MAX_CONNECTIONS"
]
=
"4"
os
.
environ
[
"CUDA_DEVICE_MAX_CONNECTIONS"
]
=
"4"
...
...
python/sglang/srt/server_args.py
View file @
b8318aec
...
@@ -140,6 +140,7 @@ class ServerArgs:
...
@@ -140,6 +140,7 @@ class ServerArgs:
disable_jump_forward
:
bool
=
False
disable_jump_forward
:
bool
=
False
disable_cuda_graph
:
bool
=
False
disable_cuda_graph
:
bool
=
False
disable_cuda_graph_padding
:
bool
=
False
disable_cuda_graph_padding
:
bool
=
False
enable_nccl_nvls
:
bool
=
False
disable_outlines_disk_cache
:
bool
=
False
disable_outlines_disk_cache
:
bool
=
False
disable_custom_all_reduce
:
bool
=
False
disable_custom_all_reduce
:
bool
=
False
disable_mla
:
bool
=
False
disable_mla
:
bool
=
False
...
@@ -783,6 +784,11 @@ class ServerArgs:
...
@@ -783,6 +784,11 @@ class ServerArgs:
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"Disable cuda graph when padding is needed. Still uses cuda graph when padding is not needed."
,
help
=
"Disable cuda graph when padding is needed. Still uses cuda graph when padding is not needed."
,
)
)
parser
.
add_argument
(
"--enable-nccl-nvls"
,
action
=
"store_true"
,
help
=
"Enable NCCL NVLS for prefill heavy requests when available."
,
)
parser
.
add_argument
(
parser
.
add_argument
(
"--disable-outlines-disk-cache"
,
"--disable-outlines-disk-cache"
,
action
=
"store_true"
,
action
=
"store_true"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment