Unverified Commit aa08a30f authored by Vincent Gimenes's avatar Vincent Gimenes Committed by GitHub
Browse files

[CLEANING] Remove unused disable_by_batch_size from SpeculativeConfig (#35060)


Signed-off-by: default avatarVincent Gimenes <vincent.gimenes@gmail.com>
parent 7f40e9e5
...@@ -101,9 +101,6 @@ class SpeculativeConfig: ...@@ -101,9 +101,6 @@ class SpeculativeConfig:
will use the default version.""" will use the default version."""
# Advanced control # Advanced control
disable_by_batch_size: int | None = Field(default=None, ge=2)
"""Disable speculative decoding for new incoming requests when the number
of enqueued requests is larger than this value, if provided."""
disable_padded_drafter_batch: bool = False disable_padded_drafter_batch: bool = False
"""Disable input padding for speculative decoding. If set to True, """Disable input padding for speculative decoding. If set to True,
speculative input batches can contain sequences of different lengths, speculative input batches can contain sequences of different lengths,
...@@ -707,13 +704,6 @@ class SpeculativeConfig: ...@@ -707,13 +704,6 @@ class SpeculativeConfig:
self.draft_parallel_config self.draft_parallel_config
) )
if self.disable_by_batch_size is not None and self.disable_by_batch_size < 2:
raise ValueError(
"Expect the batch size threshold of disabling "
"speculative decoding is > 1, but got "
f"{self.disable_by_batch_size=}"
)
eagle3_target_supported = [ eagle3_target_supported = [
"llama", "llama",
"qwen", "qwen",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment