Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
e06b0761
"docs/vscode:/vscode.git/clone" did not exist on "38077e298b1f4c191e699461bf89deba58fb4449"
Unverified
Commit
e06b0761
authored
May 29, 2025
by
iLeGend
Committed by
GitHub
May 28, 2025
Browse files
Fix PP for Qwen3 MoE (#6709)
parent
844a8f42
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
60 additions
and
4 deletions
+60
-4
python/sglang/srt/models/qwen3_moe.py
python/sglang/srt/models/qwen3_moe.py
+3
-3
test/srt/test_pp_single_node.py
test/srt/test_pp_single_node.py
+57
-1
No files found.
python/sglang/srt/models/qwen3_moe.py
View file @
e06b0761
...
@@ -812,9 +812,9 @@ class Qwen3MoeForCausalLM(nn.Module):
...
@@ -812,9 +812,9 @@ class Qwen3MoeForCausalLM(nn.Module):
logger
.
warning
(
f
"Parameter
{
name
}
not found in params_dict"
)
logger
.
warning
(
f
"Parameter
{
name
}
not found in params_dict"
)
self
.
routed_experts_weights_of_layer
=
{
self
.
routed_experts_weights_of_layer
=
{
layer_id
:
layer
.
mlp
.
get_moe_weights
()
layer_id
:
self
.
model
.
layers
[
layer_id
]
.
mlp
.
get_moe_weights
()
for
layer_id
,
layer
in
enumerate
(
self
.
model
.
layer
s
)
for
layer_id
in
range
(
self
.
start_layer
,
self
.
end_
layer
)
if
isinstance
(
layer
.
mlp
,
Qwen3MoeSparseMoeBlock
)
if
isinstance
(
self
.
model
.
layers
[
layer_id
]
.
mlp
,
Qwen3MoeSparseMoeBlock
)
}
}
@
classmethod
@
classmethod
...
...
test/srt/test_pp_single_node.py
View file @
e06b0761
...
@@ -121,7 +121,7 @@ class TestQwenPPAccuracy(unittest.TestCase):
...
@@ -121,7 +121,7 @@ class TestQwenPPAccuracy(unittest.TestCase):
class
TestQwenPPTieWeightsAccuracy
(
unittest
.
TestCase
):
class
TestQwenPPTieWeightsAccuracy
(
unittest
.
TestCase
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
cls
.
base_url
=
"http://127.0.0.1:2333
4
"
# different ports to avoid conflicts
cls
.
base_url
=
"http://127.0.0.1:2333
5
"
# different ports to avoid conflicts
cls
.
model_name
=
(
cls
.
model_name
=
(
"Qwen/Qwen3-0.6B"
# qwen3 < 8B all have tie_word_embeddings = True
"Qwen/Qwen3-0.6B"
# qwen3 < 8B all have tie_word_embeddings = True
)
)
...
@@ -176,6 +176,62 @@ class TestQwenPPTieWeightsAccuracy(unittest.TestCase):
...
@@ -176,6 +176,62 @@ class TestQwenPPTieWeightsAccuracy(unittest.TestCase):
)
)
class
TestQwenMoePPAccuracy
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
base_url
=
"http://127.0.0.1:23336"
# different ports to avoid conflicts
cls
.
model_name
=
"Qwen/Qwen3-30B-A3B"
# replace with your Qwen Model if needed
def
run_gsm8k_test
(
self
,
pp_size
):
process
=
popen_launch_server
(
self
.
model_name
,
self
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
other_args
=
[
"--pp-size"
,
pp_size
,
"--chunked-prefill-size"
,
256
,
],
)
try
:
args
=
SimpleNamespace
(
num_shots
=
5
,
data_path
=
None
,
num_questions
=
200
,
max_new_tokens
=
512
,
parallel
=
128
,
host
=
"http://127.0.0.1"
,
port
=
int
(
self
.
base_url
.
split
(
":"
)[
-
1
]),
)
metrics
=
run_eval
(
args
)
time
.
sleep
(
5
)
return
metrics
finally
:
kill_process_tree
(
process
.
pid
)
def
test_baseline_accuracy
(
self
):
metrics
=
self
.
run_gsm8k_test
(
pp_size
=
1
)
print
(
f
"[Qwen Baseline]
{
metrics
=
}
"
)
self
.
assertGreater
(
metrics
[
"accuracy"
],
0.74
)
def
test_pp_consistency
(
self
):
baseline
=
self
.
run_gsm8k_test
(
pp_size
=
1
)
pp_metrics
=
self
.
run_gsm8k_test
(
pp_size
=
2
)
print
(
f
"[Qwen PP Comparison] Baseline:
{
baseline
}
| PP:
{
pp_metrics
}
"
)
self
.
assertGreaterEqual
(
pp_metrics
[
"accuracy"
],
baseline
[
"accuracy"
]
-
0.01
,
msg
=
(
f
"PP accuracy dropped more than 1% compared to baseline. "
f
"Baseline:
{
baseline
[
'accuracy'
]:.
2
%
}
, PP:
{
pp_metrics
[
'accuracy'
]:.
2
%
}
"
),
)
class
TestFixedBugs
(
unittest
.
TestCase
):
class
TestFixedBugs
(
unittest
.
TestCase
):
def
test_chunked_prefill_with_small_bs
(
self
):
def
test_chunked_prefill_with_small_bs
(
self
):
model
=
DEFAULT_MODEL_NAME_FOR_TEST
model
=
DEFAULT_MODEL_NAME_FOR_TEST
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment