Unverified Commit bb3605db authored by qizixi's avatar qizixi Committed by GitHub
Browse files

[Bugfix] Fix v1/spec_decode/test_ngram.py (#16895)


Signed-off-by: default avatarqizixi <qizixi@meta.com>
parent fe742aef
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
import numpy as np import numpy as np
from vllm.config import SpeculativeConfig, VllmConfig
from vllm.v1.spec_decode.ngram_proposer import (NgramProposer, from vllm.v1.spec_decode.ngram_proposer import (NgramProposer,
_find_subarray_kmp, _find_subarray_kmp,
_kmp_lps_array) _kmp_lps_array)
...@@ -39,50 +40,40 @@ def test_find_subarray_kmp(): ...@@ -39,50 +40,40 @@ def test_find_subarray_kmp():
def test_ngram_proposer(): def test_ngram_proposer():
proposer = NgramProposer()
def ngram_proposer(min_n: int, max_n: int, k: int) -> NgramProposer:
return NgramProposer(vllm_config=VllmConfig(
speculative_config=SpeculativeConfig.from_dict(
{
"prompt_lookup_min": min_n,
"prompt_lookup_max": max_n,
"num_speculative_tokens": k,
"method": "ngram",
})))
# No match. # No match.
result = proposer.propose( result = ngram_proposer(
context_token_ids=np.array([1, 2, 3, 4, 5]), 2, 2, 2).propose(context_token_ids=np.array([1, 2, 3, 4, 5]))
min_n=2,
max_n=2,
k=2,
)
assert result is None assert result is None
# No match for 4-gram. # No match for 4-gram.
result = proposer.propose( result = ngram_proposer(
context_token_ids=np.array([1, 2, 3, 4, 1, 2, 3]), 4, 4, 2).propose(context_token_ids=np.array([1, 2, 3, 4, 1, 2, 3]))
min_n=4,
max_n=4,
k=2,
)
assert result is None assert result is None
# No match for 4-gram but match for 3-gram. # No match for 4-gram but match for 3-gram.
result = proposer.propose( result = ngram_proposer(
context_token_ids=np.array([1, 2, 3, 4, 1, 2, 3]), 3, 4, 2).propose(context_token_ids=np.array([1, 2, 3, 4, 1, 2, 3]))
min_n=3,
max_n=4,
k=2,
)
assert np.array_equal(result, np.array([4, 1])) assert np.array_equal(result, np.array([4, 1]))
# Match for both 4-gram and 3-gram. # Match for both 4-gram and 3-gram.
# In this case, the proposer should return the 4-gram match. # In this case, the proposer should return the 4-gram match.
result = proposer.propose( result = ngram_proposer(3, 4, 2).propose(
context_token_ids=np.array([2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4]), context_token_ids=np.array([2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4]))
min_n=3,
max_n=4,
k=2,
)
assert np.array_equal(result, np.array([1, 2])) # Not [5, 1] assert np.array_equal(result, np.array([1, 2])) # Not [5, 1]
# Match for 2-gram and 3-gram, but not 4-gram. # Match for 2-gram and 3-gram, but not 4-gram.
result = proposer.propose( result = ngram_proposer(
context_token_ids=np.array([3, 4, 5, 2, 3, 4, 1, 2, 3, 4]), 2, 4,
min_n=2, 2).propose(context_token_ids=np.array([3, 4, 5, 2, 3, 4, 1, 2, 3, 4]))
max_n=4,
k=2,
)
assert np.array_equal(result, np.array([1, 2])) # Not [5, 2] assert np.array_equal(result, np.array([1, 2])) # Not [5, 2]
...@@ -2306,7 +2306,8 @@ class SpeculativeConfig: ...@@ -2306,7 +2306,8 @@ class SpeculativeConfig:
if self.model is None and self.num_speculative_tokens is not None: if self.model is None and self.num_speculative_tokens is not None:
# TODO(Shangming): Refactor mtp configuration logic when supporting # TODO(Shangming): Refactor mtp configuration logic when supporting
# mtp acceleration for more models besides deepseek_v3 # mtp acceleration for more models besides deepseek_v3
if self.target_model_config.hf_text_config.model_type \ if self.target_model_config and \
self.target_model_config.hf_text_config.model_type \
== "deepseek_v3": == "deepseek_v3":
# use the draft model from the same model: # use the draft model from the same model:
self.model = self.target_model_config.model self.model = self.target_model_config.model
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment