Unverified Commit 19f76ee6 authored by yyzxw's avatar yyzxw Committed by GitHub
Browse files

[misc] refactor speculative config (#25657)


Signed-off-by: default avatarzxw <1020938856@qq.com>
parent dd70437a
...@@ -209,12 +209,9 @@ class SpeculativeConfig: ...@@ -209,12 +209,9 @@ class SpeculativeConfig:
if self.model is None and self.num_speculative_tokens is not None: if self.model is None and self.num_speculative_tokens is not None:
# TODO(Shangming): Refactor mtp configuration logic when supporting # TODO(Shangming): Refactor mtp configuration logic when supporting
# mtp acceleration for more models besides deepseek_v3 if (self.target_model_config
if self.target_model_config and \ and self.target_model_config.hf_text_config.model_type
(self.target_model_config.hf_text_config.model_type \ in ("deepseek_v3", "mimo", "ernie4_5_moe", "qwen3_next")):
== "deepseek_v3" or
self.target_model_config.hf_text_config.model_type in
("mimo","ernie4_5_moe", "qwen3_next")):
# use the draft model from the same model: # use the draft model from the same model:
self.model = self.target_model_config.model self.model = self.target_model_config.model
# Align the quantization of draft model for cases such as # Align the quantization of draft model for cases such as
...@@ -224,8 +221,9 @@ class SpeculativeConfig: ...@@ -224,8 +221,9 @@ class SpeculativeConfig:
elif self.method in ("ngram", "[ngram]"): elif self.method in ("ngram", "[ngram]"):
self.model = "ngram" self.model = "ngram"
else: else:
raise ValueError("num_speculative_tokens was provided without " raise ValueError(
"speculative model.") "num_speculative_tokens was provided but without "
"speculative model.")
# Automatically configure the method for ngram when "model" is used # Automatically configure the method for ngram when "model" is used
# instead of "method" # instead of "method"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment