Unverified Commit dd572c0a authored by Woosuk Kwon's avatar Woosuk Kwon Committed by GitHub
Browse files

[V0 Deprecation] Remove V0 Spec Decode workers (#21152)


Signed-off-by: default avatarWoosuk Kwon <woosuk.kwon@berkeley.edu>
parent 9ffe905a
This diff is collapsed.
...@@ -1417,28 +1417,12 @@ class EngineArgs: ...@@ -1417,28 +1417,12 @@ class EngineArgs:
return False return False
# V1 supports N-gram, Medusa, and Eagle speculative decoding. # V1 supports N-gram, Medusa, and Eagle speculative decoding.
is_ngram_enabled = False if (self.speculative_config is not None
is_eagle_enabled = False and self.speculative_config.get("method") == "draft_model"):
is_medusa_enabled = False raise NotImplementedError(
if self.speculative_config is not None: "Speculative decoding with draft model is not supported yet. "
# This is supported but experimental (handled below). "Please consider using other speculative decoding methods "
speculative_method = self.speculative_config.get("method") "such as ngram, medusa, eagle, or deepseek_mtp.")
if speculative_method:
if speculative_method in ("ngram", "[ngram]"):
is_ngram_enabled = True
elif speculative_method == "medusa":
is_medusa_enabled = True
elif speculative_method in ("eagle", "eagle3", "deepseek_mtp"):
is_eagle_enabled = True
else:
speculative_model = self.speculative_config.get("model")
if speculative_model in ("ngram", "[ngram]"):
is_ngram_enabled = True
if not (is_ngram_enabled or is_eagle_enabled or is_medusa_enabled):
# Other speculative decoding methods are not supported yet.
_raise_or_fallback(feature_name="Speculative Decoding",
recommend_to_remove=False)
return False
# No XFormers so far. # No XFormers so far.
V1_BACKENDS = [ V1_BACKENDS = [
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -104,11 +104,6 @@ class MultiStepOutputProcessor(SequenceGroupOutputProcessor): ...@@ -104,11 +104,6 @@ class MultiStepOutputProcessor(SequenceGroupOutputProcessor):
seqs = sequence_group.get_seqs( seqs = sequence_group.get_seqs(
status=SequenceStatus.FINISHED_ABORTED) status=SequenceStatus.FINISHED_ABORTED)
for output in outputs:
if output.samples[0].output_token != VLLM_INVALID_TOKEN_ID:
sequence_group.metrics.spec_token_acceptance_counts[
output.step_index] += 1
assert seqs, "Expected RUNNING or FINISHED_ABORTED sequences" assert seqs, "Expected RUNNING or FINISHED_ABORTED sequences"
assert len(seqs) == 1, ( assert len(seqs) == 1, (
"Beam search not supported in multi-step decoding.") "Beam search not supported in multi-step decoding.")
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment