Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
13705dae
Unverified
Commit
13705dae
authored
Sep 05, 2025
by
DevashishLal-CB
Committed by
GitHub
Sep 05, 2025
Browse files
[Fix] Add speculative_draft_model_revision to server_args (#5255)
Signed-off-by:
Devashish Lal
<
devashish@rivosinc.com
>
parent
df97b31f
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
68 additions
and
45 deletions
+68
-45
benchmark/gpt_oss/README.md
benchmark/gpt_oss/README.md
+2
-2
benchmark/mtbench/README.md
benchmark/mtbench/README.md
+1
-1
python/sglang/srt/configs/model_config.py
python/sglang/srt/configs/model_config.py
+7
-2
python/sglang/srt/managers/tp_worker.py
python/sglang/srt/managers/tp_worker.py
+5
-0
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+9
-0
python/sglang/test/runners.py
python/sglang/test/runners.py
+4
-0
test/srt/ep/test_deepep_small.py
test/srt/ep/test_deepep_small.py
+2
-2
test/srt/ep/test_hybrid_dp_ep_tp_mtp.py
test/srt/ep/test_hybrid_dp_ep_tp_mtp.py
+30
-30
test/srt/test_dp_attention.py
test/srt/test_dp_attention.py
+1
-1
test/srt/test_fa3.py
test/srt/test_fa3.py
+4
-4
test/srt/test_flashmla.py
test/srt/test_flashmla.py
+1
-1
test/srt/test_hybrid_attn_backend.py
test/srt/test_hybrid_attn_backend.py
+1
-1
test/srt/test_mla_int8_deepseek_v3.py
test/srt/test_mla_int8_deepseek_v3.py
+1
-1
No files found.
benchmark/gpt_oss/README.md
View file @
13705dae
...
...
@@ -132,8 +132,8 @@ python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algori
# On Blackwell:
# - Chain decoding (topk = 1) is supported on TRTLLM-MHA backend. Tree decoding (topk > 1) is in progress, stay tuned!
# - Both tree decoding (topk > 1) and chain decoding (topk = 1) are supported on the Triton backend.
python3
-m
sglang.launch_server
--model
openai/gpt-oss-120b
--speculative-algo
EAGLE3
--speculative-draft
lmsys/EAGLE3-gpt-oss-120b-bf16
--speculative-num-steps
3
--speculative-eagle-topk
1
--speculative-num-draft-tokens
4
--tp
4
python3
-m
sglang.launch_server
--model
openai/gpt-oss-120b
--speculative-algo
EAGLE3
--speculative-draft
lmsys/EAGLE3-gpt-oss-120b-bf16
--speculative-num-steps
5
--speculative-eagle-topk
4
--speculative-num-draft-tokens
8
--attention-backend
triton
--tp
4
python3
-m
sglang.launch_server
--model
openai/gpt-oss-120b
--speculative-algo
EAGLE3
--speculative-draft
-model-path
lmsys/EAGLE3-gpt-oss-120b-bf16
--speculative-num-steps
3
--speculative-eagle-topk
1
--speculative-num-draft-tokens
4
--tp
4
python3
-m
sglang.launch_server
--model
openai/gpt-oss-120b
--speculative-algo
EAGLE3
--speculative-draft
-model-path
lmsys/EAGLE3-gpt-oss-120b-bf16
--speculative-num-steps
5
--speculative-eagle-topk
4
--speculative-num-draft-tokens
8
--attention-backend
triton
--tp
4
```
Benchmark Command
...
...
benchmark/mtbench/README.md
View file @
13705dae
...
...
@@ -18,7 +18,7 @@ python3 bench_sglang.py --num-questions 80
### Benchmark sglang EAGLE
```
python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3-8B-Instruct --speculative-algo EAGLE \
--speculative-draft lmsys/sglang-EAGLE-LLaMA3-Instruct-8B --speculative-num-steps 5 \
--speculative-draft
-model-path
lmsys/sglang-EAGLE-LLaMA3-Instruct-8B --speculative-num-steps 5 \
--speculative-eagle-topk 8 --speculative-num-draft-tokens 64 --dtype float16 --port 30000
```
...
...
python/sglang/srt/configs/model_config.py
View file @
13705dae
...
...
@@ -302,11 +302,16 @@ class ModelConfig:
)
or
getattr
(
self
.
hf_config
,
"image_token_index"
,
None
)
@
staticmethod
def
from_server_args
(
server_args
:
ServerArgs
,
model_path
:
str
=
None
,
**
kwargs
):
def
from_server_args
(
server_args
:
ServerArgs
,
model_path
:
str
=
None
,
model_revision
:
str
=
None
,
**
kwargs
,
):
return
ModelConfig
(
model_path
=
model_path
or
server_args
.
model_path
,
trust_remote_code
=
server_args
.
trust_remote_code
,
revision
=
server_args
.
revision
,
revision
=
model_revision
or
server_args
.
revision
,
context_length
=
server_args
.
context_length
,
model_override_args
=
server_args
.
json_model_override_args
,
is_embedding
=
server_args
.
is_embedding
,
...
...
python/sglang/srt/managers/tp_worker.py
View file @
13705dae
...
...
@@ -78,6 +78,11 @@ class TpModelWorker:
if
not
is_draft_worker
else
server_args
.
speculative_draft_model_path
),
model_revision
=
(
server_args
.
revision
if
not
is_draft_worker
else
server_args
.
speculative_draft_model_revision
),
is_draft_model
=
is_draft_worker
,
)
...
...
python/sglang/srt/server_args.py
View file @
13705dae
...
...
@@ -249,6 +249,7 @@ class ServerArgs:
# Speculative decoding
speculative_algorithm
:
Optional
[
str
]
=
None
speculative_draft_model_path
:
Optional
[
str
]
=
None
speculative_draft_model_revision
:
Optional
[
str
]
=
None
speculative_num_steps
:
Optional
[
int
]
=
None
speculative_eagle_topk
:
Optional
[
int
]
=
None
speculative_num_draft_tokens
:
Optional
[
int
]
=
None
...
...
@@ -1498,6 +1499,14 @@ class ServerArgs:
type
=
str
,
help
=
"The path of the draft model weights. This can be a local folder or a Hugging Face repo ID."
,
)
parser
.
add_argument
(
"--speculative-draft-model-revision"
,
type
=
str
,
default
=
None
,
help
=
"The specific draft model version to use. It can be a branch "
"name, a tag name, or a commit id. If unspecified, will use "
"the default version."
,
)
parser
.
add_argument
(
"--speculative-num-steps"
,
type
=
int
,
...
...
python/sglang/test/runners.py
View file @
13705dae
...
...
@@ -505,6 +505,7 @@ class SRTRunner:
mem_fraction_static
:
float
=
0.65
,
trust_remote_code
:
bool
=
False
,
speculative_draft_model_path
:
Optional
[
str
]
=
None
,
speculative_draft_model_revision
:
Optional
[
str
]
=
None
,
speculative_algorithm
:
Optional
[
str
]
=
None
,
speculative_num_steps
:
Optional
[
int
]
=
None
,
speculative_eagle_topk
:
Optional
[
int
]
=
None
,
...
...
@@ -526,6 +527,9 @@ class SRTRunner:
spec_kwargs
=
{}
if
speculative_draft_model_path
:
spec_kwargs
[
"speculative_draft_model_path"
]
=
speculative_draft_model_path
spec_kwargs
[
"speculative_draft_model_revision"
]
=
(
speculative_draft_model_revision
)
spec_kwargs
[
"speculative_algorithm"
]
=
speculative_algorithm
spec_kwargs
[
"speculative_num_steps"
]
=
speculative_num_steps
spec_kwargs
[
"speculative_eagle_topk"
]
=
speculative_eagle_topk
...
...
test/srt/ep/test_deepep_small.py
View file @
13705dae
...
...
@@ -268,7 +268,7 @@ class TestMTP(CustomTestCase):
"deepep"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -343,7 +343,7 @@ class TestMTPWithTBO(CustomTestCase):
"3"
,
"--speculative-num-draft-tokens"
,
"3"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN
,
"--chunked-prefill-size"
,
"256"
,
...
...
test/srt/ep/test_hybrid_dp_ep_tp_mtp.py
View file @
13705dae
...
...
@@ -1225,7 +1225,7 @@ class Test30(CustomTestCase):
"8"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1272,7 +1272,7 @@ class Test31(CustomTestCase):
"4"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1319,7 +1319,7 @@ class Test32(CustomTestCase):
"8"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1365,7 +1365,7 @@ class Test33(CustomTestCase):
"1"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1414,7 +1414,7 @@ class Test34(CustomTestCase):
"1"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1463,7 +1463,7 @@ class Test35(CustomTestCase):
"1"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1511,7 +1511,7 @@ class Test36(CustomTestCase):
"--enable-dp-lm-head"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1559,7 +1559,7 @@ class Test37(CustomTestCase):
"--enable-dp-lm-head"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1609,7 +1609,7 @@ class Test38(CustomTestCase):
"--enable-dp-lm-head"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1659,7 +1659,7 @@ class Test39(CustomTestCase):
"--enable-dp-lm-head"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1709,7 +1709,7 @@ class Test40(CustomTestCase):
"32"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1762,7 +1762,7 @@ class Test41(CustomTestCase):
"32"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1815,7 +1815,7 @@ class Test42(CustomTestCase):
"32"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1867,7 +1867,7 @@ class Test43(CustomTestCase):
"32"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1922,7 +1922,7 @@ class Test44(CustomTestCase):
"32"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -1977,7 +1977,7 @@ class Test45(CustomTestCase):
"32"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2031,7 +2031,7 @@ class Test46(CustomTestCase):
"32"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2085,7 +2085,7 @@ class Test47(CustomTestCase):
"32"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2141,7 +2141,7 @@ class Test48(CustomTestCase):
"32"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2197,7 +2197,7 @@ class Test49(CustomTestCase):
"32"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2243,7 +2243,7 @@ class Test50(CustomTestCase):
"8"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2292,7 +2292,7 @@ class Test51(CustomTestCase):
"8"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2341,7 +2341,7 @@ class Test52(CustomTestCase):
"8"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2389,7 +2389,7 @@ class Test53(CustomTestCase):
"8"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2440,7 +2440,7 @@ class Test54(CustomTestCase):
"8"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2491,7 +2491,7 @@ class Test55(CustomTestCase):
"8"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2541,7 +2541,7 @@ class Test56(CustomTestCase):
"8"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2591,7 +2591,7 @@ class Test57(CustomTestCase):
"8"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2643,7 +2643,7 @@ class Test58(CustomTestCase):
"8"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
@@ -2695,7 +2695,7 @@ class Test59(CustomTestCase):
"8"
,
"--speculative-algo"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/DeepSeek-V3-0324-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
test/srt/test_dp_attention.py
View file @
13705dae
...
...
@@ -74,7 +74,7 @@ class TestDPAttentionDP2TP2DeepseekV3MTP(CustomTestCase):
"4"
,
"--speculative-num-draft-tokens"
,
"4"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN
,
"--tp-size"
,
"2"
,
...
...
test/srt/test_fa3.py
View file @
13705dae
...
...
@@ -146,7 +146,7 @@ class TestFlashAttention3SpeculativeDecode(BaseFlashAttentionTest):
"4"
,
"--speculative-algorithm"
,
"EAGLE3"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
DEFAULT_MODEL_NAME_FOR_TEST_EAGLE3
,
"--speculative-num-steps"
,
"3"
,
...
...
@@ -180,7 +180,7 @@ class TestFlashAttention3SpeculativeDecodeTopk(BaseFlashAttentionTest):
"4"
,
"--speculative-algorithm"
,
"EAGLE3"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
DEFAULT_MODEL_NAME_FOR_TEST_EAGLE3
,
"--speculative-num-steps"
,
"5"
,
...
...
@@ -212,7 +212,7 @@ class TestFlashAttention3MLASpeculativeDecode(BaseFlashAttentionTest):
"4"
,
"--speculative-algorithm"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN
,
"--speculative-num-steps"
,
"3"
,
...
...
@@ -244,7 +244,7 @@ class TestFlashAttention3MLASpeculativeDecodeTopk(BaseFlashAttentionTest):
"4"
,
"--speculative-algorithm"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN
,
"--speculative-num-steps"
,
"5"
,
...
...
test/srt/test_flashmla.py
View file @
13705dae
...
...
@@ -100,7 +100,7 @@ class TestFlashMLAMTP(CustomTestCase):
"1"
,
"--speculative-algorithm"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"lmsys/sglang-ci-dsv3-test-NextN"
,
"--speculative-num-steps"
,
"1"
,
...
...
test/srt/test_hybrid_attn_backend.py
View file @
13705dae
...
...
@@ -121,7 +121,7 @@ class TestHybridAttnBackendSpeculativeDecoding(TestHybridAttnBackendBase):
return
DEFAULT_SERVER_ARGS
+
[
"--speculative-algorithm"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST
,
"--speculative-num-steps"
,
"3"
,
...
...
test/srt/test_mla_int8_deepseek_v3.py
View file @
13705dae
...
...
@@ -67,7 +67,7 @@ class TestDeepseekV3MTPChannelInt8(CustomTestCase):
"1"
,
"--speculative-algorithm"
,
"EAGLE"
,
"--speculative-draft"
,
"--speculative-draft
-model-path
"
,
"sgl-project/sglang-ci-dsv3-channel-int8-test-NextN"
,
"--speculative-num-steps"
,
"2"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment