Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
cd135bfe
"git@developer.sourcefind.cn:change/sglang.git" did not exist on "66283dbc0c052c6f32bde68451addc5b0d00cf3b"
Unverified
Commit
cd135bfe
authored
Nov 06, 2025
by
Ke Bao
Committed by
GitHub
Nov 06, 2025
Browse files
Update dsv3 quantization auto setting for sm100 (#12778)
parent
fc84b073
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
9 deletions
+22
-9
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+22
-9
No files found.
python/sglang/srt/server_args.py
View file @
cd135bfe
...
...
@@ -912,19 +912,32 @@ class ServerArgs:
logger
.
info
(
"Enable FlashInfer AllReduce Fusion on sm100 for DeepseekV3ForCausalLM"
)
if
self
.
moe_a2a_backend
==
"none"
and
self
.
moe_runner_backend
==
"auto"
:
self
.
moe_runner_backend
=
"flashinfer_trtllm"
logger
.
info
(
"Use flashinfer_trtllm as MoE runner backend on sm100 for DeepseekV3ForCausalLM"
)
if
self
.
quantization
is
None
:
# Default DeepSeek V3/R1 native FP8 when not explicitly set,
# Because we need this condition for an assertion in
# flashinfer_trtllm MoE runner backend.
quantization_config
=
getattr
(
hf_config
,
"quantization_config"
,
None
)
quant_method
=
(
quantization_config
.
get
(
"quant_method"
)
if
quantization_config
is
not
None
else
None
)
if
self
.
quantization
is
None
:
# Default DeepSeek V3/R1 native FP8 when not explicitly set,
# Because we need this condition for an assertion in
# flashinfer_trtllm MoE runner backend.
if
quant_method
is
None
:
self
.
quantization
=
"fp8"
logger
.
info
(
"Quantization not specified, default to fp8 for DeepSeek on sm100"
)
else
:
self
.
quantization
=
quant_method
if
(
self
.
moe_a2a_backend
==
"none"
and
self
.
moe_runner_backend
==
"auto"
and
self
.
quantization
in
[
"fp8"
,
"modelopt_fp8"
,
"modelopt_fp4"
]
):
self
.
moe_runner_backend
=
"flashinfer_trtllm"
logger
.
info
(
"Use flashinfer_trtllm as MoE runner backend on sm100 for DeepseekV3ForCausalLM"
)
elif
model_arch
in
[
"GptOssForCausalLM"
]:
if
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment