Unverified Commit 13705dae authored by DevashishLal-CB's avatar DevashishLal-CB Committed by GitHub
Browse files

[Fix] Add speculative_draft_model_revision to server_args (#5255)


Signed-off-by: default avatarDevashish Lal <devashish@rivosinc.com>
parent df97b31f
...@@ -132,8 +132,8 @@ python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algori ...@@ -132,8 +132,8 @@ python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algori
# On Blackwell: # On Blackwell:
# - Chain decoding (topk = 1) is supported on TRTLLM-MHA backend. Tree decoding (topk > 1) is in progress, stay tuned! # - Chain decoding (topk = 1) is supported on TRTLLM-MHA backend. Tree decoding (topk > 1) is in progress, stay tuned!
# - Both tree decoding (topk > 1) and chain decoding (topk = 1) are supported on the Triton backend. # - Both tree decoding (topk > 1) and chain decoding (topk = 1) are supported on the Triton backend.
python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4 --tp 4 python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft-model-path lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4 --tp 4
python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 5 --speculative-eagle-topk 4 --speculative-num-draft-tokens 8 --attention-backend triton --tp 4 python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft-model-path lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 5 --speculative-eagle-topk 4 --speculative-num-draft-tokens 8 --attention-backend triton --tp 4
``` ```
Benchmark Command Benchmark Command
......
...@@ -18,7 +18,7 @@ python3 bench_sglang.py --num-questions 80 ...@@ -18,7 +18,7 @@ python3 bench_sglang.py --num-questions 80
### Benchmark sglang EAGLE ### Benchmark sglang EAGLE
``` ```
python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3-8B-Instruct --speculative-algo EAGLE \ python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3-8B-Instruct --speculative-algo EAGLE \
--speculative-draft lmsys/sglang-EAGLE-LLaMA3-Instruct-8B --speculative-num-steps 5 \ --speculative-draft-model-path lmsys/sglang-EAGLE-LLaMA3-Instruct-8B --speculative-num-steps 5 \
--speculative-eagle-topk 8 --speculative-num-draft-tokens 64 --dtype float16 --port 30000 --speculative-eagle-topk 8 --speculative-num-draft-tokens 64 --dtype float16 --port 30000
``` ```
......
...@@ -302,11 +302,16 @@ class ModelConfig: ...@@ -302,11 +302,16 @@ class ModelConfig:
) or getattr(self.hf_config, "image_token_index", None) ) or getattr(self.hf_config, "image_token_index", None)
@staticmethod @staticmethod
def from_server_args(server_args: ServerArgs, model_path: str = None, **kwargs): def from_server_args(
server_args: ServerArgs,
model_path: str = None,
model_revision: str = None,
**kwargs,
):
return ModelConfig( return ModelConfig(
model_path=model_path or server_args.model_path, model_path=model_path or server_args.model_path,
trust_remote_code=server_args.trust_remote_code, trust_remote_code=server_args.trust_remote_code,
revision=server_args.revision, revision=model_revision or server_args.revision,
context_length=server_args.context_length, context_length=server_args.context_length,
model_override_args=server_args.json_model_override_args, model_override_args=server_args.json_model_override_args,
is_embedding=server_args.is_embedding, is_embedding=server_args.is_embedding,
......
...@@ -78,6 +78,11 @@ class TpModelWorker: ...@@ -78,6 +78,11 @@ class TpModelWorker:
if not is_draft_worker if not is_draft_worker
else server_args.speculative_draft_model_path else server_args.speculative_draft_model_path
), ),
model_revision=(
server_args.revision
if not is_draft_worker
else server_args.speculative_draft_model_revision
),
is_draft_model=is_draft_worker, is_draft_model=is_draft_worker,
) )
......
...@@ -249,6 +249,7 @@ class ServerArgs: ...@@ -249,6 +249,7 @@ class ServerArgs:
# Speculative decoding # Speculative decoding
speculative_algorithm: Optional[str] = None speculative_algorithm: Optional[str] = None
speculative_draft_model_path: Optional[str] = None speculative_draft_model_path: Optional[str] = None
speculative_draft_model_revision: Optional[str] = None
speculative_num_steps: Optional[int] = None speculative_num_steps: Optional[int] = None
speculative_eagle_topk: Optional[int] = None speculative_eagle_topk: Optional[int] = None
speculative_num_draft_tokens: Optional[int] = None speculative_num_draft_tokens: Optional[int] = None
...@@ -1498,6 +1499,14 @@ class ServerArgs: ...@@ -1498,6 +1499,14 @@ class ServerArgs:
type=str, type=str,
help="The path of the draft model weights. This can be a local folder or a Hugging Face repo ID.", help="The path of the draft model weights. This can be a local folder or a Hugging Face repo ID.",
) )
parser.add_argument(
"--speculative-draft-model-revision",
type=str,
default=None,
help="The specific draft model version to use. It can be a branch "
"name, a tag name, or a commit id. If unspecified, will use "
"the default version.",
)
parser.add_argument( parser.add_argument(
"--speculative-num-steps", "--speculative-num-steps",
type=int, type=int,
......
...@@ -505,6 +505,7 @@ class SRTRunner: ...@@ -505,6 +505,7 @@ class SRTRunner:
mem_fraction_static: float = 0.65, mem_fraction_static: float = 0.65,
trust_remote_code: bool = False, trust_remote_code: bool = False,
speculative_draft_model_path: Optional[str] = None, speculative_draft_model_path: Optional[str] = None,
speculative_draft_model_revision: Optional[str] = None,
speculative_algorithm: Optional[str] = None, speculative_algorithm: Optional[str] = None,
speculative_num_steps: Optional[int] = None, speculative_num_steps: Optional[int] = None,
speculative_eagle_topk: Optional[int] = None, speculative_eagle_topk: Optional[int] = None,
...@@ -526,6 +527,9 @@ class SRTRunner: ...@@ -526,6 +527,9 @@ class SRTRunner:
spec_kwargs = {} spec_kwargs = {}
if speculative_draft_model_path: if speculative_draft_model_path:
spec_kwargs["speculative_draft_model_path"] = speculative_draft_model_path spec_kwargs["speculative_draft_model_path"] = speculative_draft_model_path
spec_kwargs["speculative_draft_model_revision"] = (
speculative_draft_model_revision
)
spec_kwargs["speculative_algorithm"] = speculative_algorithm spec_kwargs["speculative_algorithm"] = speculative_algorithm
spec_kwargs["speculative_num_steps"] = speculative_num_steps spec_kwargs["speculative_num_steps"] = speculative_num_steps
spec_kwargs["speculative_eagle_topk"] = speculative_eagle_topk spec_kwargs["speculative_eagle_topk"] = speculative_eagle_topk
......
...@@ -268,7 +268,7 @@ class TestMTP(CustomTestCase): ...@@ -268,7 +268,7 @@ class TestMTP(CustomTestCase):
"deepep", "deepep",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN, DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN,
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -343,7 +343,7 @@ class TestMTPWithTBO(CustomTestCase): ...@@ -343,7 +343,7 @@ class TestMTPWithTBO(CustomTestCase):
"3", "3",
"--speculative-num-draft-tokens", "--speculative-num-draft-tokens",
"3", "3",
"--speculative-draft", "--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN, DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN,
"--chunked-prefill-size", "--chunked-prefill-size",
"256", "256",
......
...@@ -1225,7 +1225,7 @@ class Test30(CustomTestCase): ...@@ -1225,7 +1225,7 @@ class Test30(CustomTestCase):
"8", "8",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1272,7 +1272,7 @@ class Test31(CustomTestCase): ...@@ -1272,7 +1272,7 @@ class Test31(CustomTestCase):
"4", "4",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1319,7 +1319,7 @@ class Test32(CustomTestCase): ...@@ -1319,7 +1319,7 @@ class Test32(CustomTestCase):
"8", "8",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1365,7 +1365,7 @@ class Test33(CustomTestCase): ...@@ -1365,7 +1365,7 @@ class Test33(CustomTestCase):
"1", "1",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1414,7 +1414,7 @@ class Test34(CustomTestCase): ...@@ -1414,7 +1414,7 @@ class Test34(CustomTestCase):
"1", "1",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1463,7 +1463,7 @@ class Test35(CustomTestCase): ...@@ -1463,7 +1463,7 @@ class Test35(CustomTestCase):
"1", "1",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1511,7 +1511,7 @@ class Test36(CustomTestCase): ...@@ -1511,7 +1511,7 @@ class Test36(CustomTestCase):
"--enable-dp-lm-head", "--enable-dp-lm-head",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1559,7 +1559,7 @@ class Test37(CustomTestCase): ...@@ -1559,7 +1559,7 @@ class Test37(CustomTestCase):
"--enable-dp-lm-head", "--enable-dp-lm-head",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1609,7 +1609,7 @@ class Test38(CustomTestCase): ...@@ -1609,7 +1609,7 @@ class Test38(CustomTestCase):
"--enable-dp-lm-head", "--enable-dp-lm-head",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1659,7 +1659,7 @@ class Test39(CustomTestCase): ...@@ -1659,7 +1659,7 @@ class Test39(CustomTestCase):
"--enable-dp-lm-head", "--enable-dp-lm-head",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1709,7 +1709,7 @@ class Test40(CustomTestCase): ...@@ -1709,7 +1709,7 @@ class Test40(CustomTestCase):
"32", "32",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1762,7 +1762,7 @@ class Test41(CustomTestCase): ...@@ -1762,7 +1762,7 @@ class Test41(CustomTestCase):
"32", "32",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1815,7 +1815,7 @@ class Test42(CustomTestCase): ...@@ -1815,7 +1815,7 @@ class Test42(CustomTestCase):
"32", "32",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1867,7 +1867,7 @@ class Test43(CustomTestCase): ...@@ -1867,7 +1867,7 @@ class Test43(CustomTestCase):
"32", "32",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1922,7 +1922,7 @@ class Test44(CustomTestCase): ...@@ -1922,7 +1922,7 @@ class Test44(CustomTestCase):
"32", "32",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -1977,7 +1977,7 @@ class Test45(CustomTestCase): ...@@ -1977,7 +1977,7 @@ class Test45(CustomTestCase):
"32", "32",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2031,7 +2031,7 @@ class Test46(CustomTestCase): ...@@ -2031,7 +2031,7 @@ class Test46(CustomTestCase):
"32", "32",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2085,7 +2085,7 @@ class Test47(CustomTestCase): ...@@ -2085,7 +2085,7 @@ class Test47(CustomTestCase):
"32", "32",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2141,7 +2141,7 @@ class Test48(CustomTestCase): ...@@ -2141,7 +2141,7 @@ class Test48(CustomTestCase):
"32", "32",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2197,7 +2197,7 @@ class Test49(CustomTestCase): ...@@ -2197,7 +2197,7 @@ class Test49(CustomTestCase):
"32", "32",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2243,7 +2243,7 @@ class Test50(CustomTestCase): ...@@ -2243,7 +2243,7 @@ class Test50(CustomTestCase):
"8", "8",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2292,7 +2292,7 @@ class Test51(CustomTestCase): ...@@ -2292,7 +2292,7 @@ class Test51(CustomTestCase):
"8", "8",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2341,7 +2341,7 @@ class Test52(CustomTestCase): ...@@ -2341,7 +2341,7 @@ class Test52(CustomTestCase):
"8", "8",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2389,7 +2389,7 @@ class Test53(CustomTestCase): ...@@ -2389,7 +2389,7 @@ class Test53(CustomTestCase):
"8", "8",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2440,7 +2440,7 @@ class Test54(CustomTestCase): ...@@ -2440,7 +2440,7 @@ class Test54(CustomTestCase):
"8", "8",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2491,7 +2491,7 @@ class Test55(CustomTestCase): ...@@ -2491,7 +2491,7 @@ class Test55(CustomTestCase):
"8", "8",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2541,7 +2541,7 @@ class Test56(CustomTestCase): ...@@ -2541,7 +2541,7 @@ class Test56(CustomTestCase):
"8", "8",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2591,7 +2591,7 @@ class Test57(CustomTestCase): ...@@ -2591,7 +2591,7 @@ class Test57(CustomTestCase):
"8", "8",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2643,7 +2643,7 @@ class Test58(CustomTestCase): ...@@ -2643,7 +2643,7 @@ class Test58(CustomTestCase):
"8", "8",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
...@@ -2695,7 +2695,7 @@ class Test59(CustomTestCase): ...@@ -2695,7 +2695,7 @@ class Test59(CustomTestCase):
"8", "8",
"--speculative-algo", "--speculative-algo",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/DeepSeek-V3-0324-NextN", "lmsys/DeepSeek-V3-0324-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
......
...@@ -74,7 +74,7 @@ class TestDPAttentionDP2TP2DeepseekV3MTP(CustomTestCase): ...@@ -74,7 +74,7 @@ class TestDPAttentionDP2TP2DeepseekV3MTP(CustomTestCase):
"4", "4",
"--speculative-num-draft-tokens", "--speculative-num-draft-tokens",
"4", "4",
"--speculative-draft", "--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN, DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN,
"--tp-size", "--tp-size",
"2", "2",
......
...@@ -146,7 +146,7 @@ class TestFlashAttention3SpeculativeDecode(BaseFlashAttentionTest): ...@@ -146,7 +146,7 @@ class TestFlashAttention3SpeculativeDecode(BaseFlashAttentionTest):
"4", "4",
"--speculative-algorithm", "--speculative-algorithm",
"EAGLE3", "EAGLE3",
"--speculative-draft", "--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_EAGLE3, DEFAULT_MODEL_NAME_FOR_TEST_EAGLE3,
"--speculative-num-steps", "--speculative-num-steps",
"3", "3",
...@@ -180,7 +180,7 @@ class TestFlashAttention3SpeculativeDecodeTopk(BaseFlashAttentionTest): ...@@ -180,7 +180,7 @@ class TestFlashAttention3SpeculativeDecodeTopk(BaseFlashAttentionTest):
"4", "4",
"--speculative-algorithm", "--speculative-algorithm",
"EAGLE3", "EAGLE3",
"--speculative-draft", "--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_EAGLE3, DEFAULT_MODEL_NAME_FOR_TEST_EAGLE3,
"--speculative-num-steps", "--speculative-num-steps",
"5", "5",
...@@ -212,7 +212,7 @@ class TestFlashAttention3MLASpeculativeDecode(BaseFlashAttentionTest): ...@@ -212,7 +212,7 @@ class TestFlashAttention3MLASpeculativeDecode(BaseFlashAttentionTest):
"4", "4",
"--speculative-algorithm", "--speculative-algorithm",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN, DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN,
"--speculative-num-steps", "--speculative-num-steps",
"3", "3",
...@@ -244,7 +244,7 @@ class TestFlashAttention3MLASpeculativeDecodeTopk(BaseFlashAttentionTest): ...@@ -244,7 +244,7 @@ class TestFlashAttention3MLASpeculativeDecodeTopk(BaseFlashAttentionTest):
"4", "4",
"--speculative-algorithm", "--speculative-algorithm",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN, DEFAULT_MODEL_NAME_FOR_TEST_MLA_NEXTN,
"--speculative-num-steps", "--speculative-num-steps",
"5", "5",
......
...@@ -100,7 +100,7 @@ class TestFlashMLAMTP(CustomTestCase): ...@@ -100,7 +100,7 @@ class TestFlashMLAMTP(CustomTestCase):
"1", "1",
"--speculative-algorithm", "--speculative-algorithm",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"lmsys/sglang-ci-dsv3-test-NextN", "lmsys/sglang-ci-dsv3-test-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"1", "1",
......
...@@ -121,7 +121,7 @@ class TestHybridAttnBackendSpeculativeDecoding(TestHybridAttnBackendBase): ...@@ -121,7 +121,7 @@ class TestHybridAttnBackendSpeculativeDecoding(TestHybridAttnBackendBase):
return DEFAULT_SERVER_ARGS + [ return DEFAULT_SERVER_ARGS + [
"--speculative-algorithm", "--speculative-algorithm",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST, DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST,
"--speculative-num-steps", "--speculative-num-steps",
"3", "3",
......
...@@ -67,7 +67,7 @@ class TestDeepseekV3MTPChannelInt8(CustomTestCase): ...@@ -67,7 +67,7 @@ class TestDeepseekV3MTPChannelInt8(CustomTestCase):
"1", "1",
"--speculative-algorithm", "--speculative-algorithm",
"EAGLE", "EAGLE",
"--speculative-draft", "--speculative-draft-model-path",
"sgl-project/sglang-ci-dsv3-channel-int8-test-NextN", "sgl-project/sglang-ci-dsv3-channel-int8-test-NextN",
"--speculative-num-steps", "--speculative-num-steps",
"2", "2",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment