Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
27985c27
Unverified
Commit
27985c27
authored
Aug 14, 2025
by
Yineng Zhang
Committed by
GitHub
Aug 14, 2025
Browse files
feat: update model config (#9202)
parent
ac474869
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
2 deletions
+16
-2
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+16
-2
No files found.
python/sglang/srt/server_args.py
View file @
27985c27
...
@@ -24,7 +24,7 @@ import tempfile
...
@@ -24,7 +24,7 @@ import tempfile
from
typing
import
List
,
Literal
,
Optional
,
Union
from
typing
import
List
,
Literal
,
Optional
,
Union
from
sglang.srt.hf_transformers_utils
import
check_gguf_file
,
get_config
from
sglang.srt.hf_transformers_utils
import
check_gguf_file
,
get_config
from
sglang.srt.layers.utils
import
is_sm100_supported
from
sglang.srt.layers.utils
import
is_sm90_supported
,
is_sm100_supported
from
sglang.srt.lora.lora_registry
import
LoRARef
from
sglang.srt.lora.lora_registry
import
LoRARef
from
sglang.srt.reasoning_parser
import
ReasoningParser
from
sglang.srt.reasoning_parser
import
ReasoningParser
from
sglang.srt.utils
import
(
from
sglang.srt.utils
import
(
...
@@ -2117,11 +2117,25 @@ class ServerArgs:
...
@@ -2117,11 +2117,25 @@ class ServerArgs:
model_arch
=
hf_config
.
architectures
[
0
]
model_arch
=
hf_config
.
architectures
[
0
]
if
model_arch
in
[
"GptOssForCausalLM"
]:
if
model_arch
in
[
"GptOssForCausalLM"
]:
if
self
.
attention_backend
is
None
:
if
self
.
attention_backend
is
None
:
self
.
attention_backend
=
"triton"
if
is_sm100_supported
():
self
.
attention_backend
=
"trtllm_mha"
elif
is_sm90_supported
():
self
.
attention_backend
=
"fa3"
else
:
self
.
attention_backend
=
"triton"
supported_backends
=
[
"triton"
,
"trtllm_mha"
,
"fa3"
]
supported_backends
=
[
"triton"
,
"trtllm_mha"
,
"fa3"
]
logger
.
info
(
f
"Use
{
self
.
attention_backend
}
as attention backend for GptOssForCausalLM"
)
assert
(
assert
(
self
.
attention_backend
in
supported_backends
self
.
attention_backend
in
supported_backends
),
f
"GptOssForCausalLM requires one of
{
supported_backends
}
attention backend, but got '
{
self
.
attention_backend
}
'"
),
f
"GptOssForCausalLM requires one of
{
supported_backends
}
attention backend, but got '
{
self
.
attention_backend
}
'"
if
is_sm100_supported
():
self
.
enable_flashinfer_allreduce_fusion
=
True
logger
.
info
(
"Enable FlashInfer AllReduce Fusion on sm100 for GptOssForCausalLM"
)
quantization_config
=
getattr
(
hf_config
,
"quantization_config"
,
None
)
quantization_config
=
getattr
(
hf_config
,
"quantization_config"
,
None
)
is_mxfp4_quant_format
=
(
is_mxfp4_quant_format
=
(
quantization_config
is
not
None
quantization_config
is
not
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment