Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c10b8e6a
"vscode:/vscode.git/clone" did not exist on "512a0d4de186552764fcb49a6f7b6b362b7496c1"
Unverified
Commit
c10b8e6a
authored
Aug 20, 2025
by
Nicolas Castet
Committed by
GitHub
Aug 20, 2025
Browse files
Support DP attention with GPT-OSS (#9359)
parent
d4bce297
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
5 deletions
+6
-5
python/sglang/srt/models/gpt_oss.py
python/sglang/srt/models/gpt_oss.py
+1
-1
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+5
-4
No files found.
python/sglang/srt/models/gpt_oss.py
View file @
c10b8e6a
...
@@ -1091,7 +1091,7 @@ class GptOssForCausalLM(nn.Module):
...
@@ -1091,7 +1091,7 @@ class GptOssForCausalLM(nn.Module):
if
name
in
params_dict
.
keys
():
if
name
in
params_dict
.
keys
():
param
=
params_dict
[
name
]
param
=
params_dict
[
name
]
if
"sinks"
in
name
:
if
"sinks"
in
name
:
start
=
tp_rank
*
param
.
numel
()
start
=
get_attention_
tp_rank
()
*
param
.
numel
()
param
.
data
.
copy_
(
param
.
data
.
copy_
(
loaded_weight
[
start
:
start
+
param
.
numel
()]
loaded_weight
[
start
:
start
+
param
.
numel
()]
)
)
...
...
python/sglang/srt/server_args.py
View file @
c10b8e6a
...
@@ -2183,10 +2183,11 @@ class ServerArgs:
...
@@ -2183,10 +2183,11 @@ class ServerArgs:
),
f
"GptOssForCausalLM requires one of
{
supported_backends
}
attention backend, but got '
{
self
.
attention_backend
}
'"
),
f
"GptOssForCausalLM requires one of
{
supported_backends
}
attention backend, but got '
{
self
.
attention_backend
}
'"
if
is_sm100_supported
():
if
is_sm100_supported
():
self
.
enable_flashinfer_allreduce_fusion
=
True
if
not
self
.
enable_dp_attention
:
logger
.
info
(
self
.
enable_flashinfer_allreduce_fusion
=
True
"Enable FlashInfer AllReduce Fusion on sm100 for GptOssForCausalLM"
logger
.
info
(
)
"Enable FlashInfer AllReduce Fusion on sm100 for GptOssForCausalLM"
)
quantization_config
=
getattr
(
hf_config
,
"quantization_config"
,
None
)
quantization_config
=
getattr
(
hf_config
,
"quantization_config"
,
None
)
is_mxfp4_quant_format
=
(
is_mxfp4_quant_format
=
(
quantization_config
is
not
None
quantization_config
is
not
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment