Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bb3605db
Unverified
Commit
bb3605db
authored
Apr 20, 2025
by
qizixi
Committed by
GitHub
Apr 20, 2025
Browse files
[Bugfix] Fix v1/spec_decode/test_ngram.py (#16895)
Signed-off-by:
qizixi
<
qizixi@meta.com
>
parent
fe742aef
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
30 additions
and
38 deletions
+30
-38
tests/v1/spec_decode/test_ngram.py
tests/v1/spec_decode/test_ngram.py
+22
-31
vllm/config.py
vllm/config.py
+8
-7
No files found.
tests/v1/spec_decode/test_ngram.py
View file @
bb3605db
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
import
numpy
as
np
import
numpy
as
np
from
vllm.config
import
SpeculativeConfig
,
VllmConfig
from
vllm.v1.spec_decode.ngram_proposer
import
(
NgramProposer
,
from
vllm.v1.spec_decode.ngram_proposer
import
(
NgramProposer
,
_find_subarray_kmp
,
_find_subarray_kmp
,
_kmp_lps_array
)
_kmp_lps_array
)
...
@@ -39,50 +40,40 @@ def test_find_subarray_kmp():
...
@@ -39,50 +40,40 @@ def test_find_subarray_kmp():
def
test_ngram_proposer
():
def
test_ngram_proposer
():
proposer
=
NgramProposer
()
def
ngram_proposer
(
min_n
:
int
,
max_n
:
int
,
k
:
int
)
->
NgramProposer
:
return
NgramProposer
(
vllm_config
=
VllmConfig
(
speculative_config
=
SpeculativeConfig
.
from_dict
(
{
"prompt_lookup_min"
:
min_n
,
"prompt_lookup_max"
:
max_n
,
"num_speculative_tokens"
:
k
,
"method"
:
"ngram"
,
})))
# No match.
# No match.
result
=
proposer
.
propose
(
result
=
ngram_proposer
(
context_token_ids
=
np
.
array
([
1
,
2
,
3
,
4
,
5
]),
2
,
2
,
2
).
propose
(
context_token_ids
=
np
.
array
([
1
,
2
,
3
,
4
,
5
]))
min_n
=
2
,
max_n
=
2
,
k
=
2
,
)
assert
result
is
None
assert
result
is
None
# No match for 4-gram.
# No match for 4-gram.
result
=
proposer
.
propose
(
result
=
ngram_proposer
(
context_token_ids
=
np
.
array
([
1
,
2
,
3
,
4
,
1
,
2
,
3
]),
4
,
4
,
2
).
propose
(
context_token_ids
=
np
.
array
([
1
,
2
,
3
,
4
,
1
,
2
,
3
]))
min_n
=
4
,
max_n
=
4
,
k
=
2
,
)
assert
result
is
None
assert
result
is
None
# No match for 4-gram but match for 3-gram.
# No match for 4-gram but match for 3-gram.
result
=
proposer
.
propose
(
result
=
ngram_proposer
(
context_token_ids
=
np
.
array
([
1
,
2
,
3
,
4
,
1
,
2
,
3
]),
3
,
4
,
2
).
propose
(
context_token_ids
=
np
.
array
([
1
,
2
,
3
,
4
,
1
,
2
,
3
]))
min_n
=
3
,
max_n
=
4
,
k
=
2
,
)
assert
np
.
array_equal
(
result
,
np
.
array
([
4
,
1
]))
assert
np
.
array_equal
(
result
,
np
.
array
([
4
,
1
]))
# Match for both 4-gram and 3-gram.
# Match for both 4-gram and 3-gram.
# In this case, the proposer should return the 4-gram match.
# In this case, the proposer should return the 4-gram match.
result
=
proposer
.
propose
(
result
=
ngram_proposer
(
3
,
4
,
2
).
propose
(
context_token_ids
=
np
.
array
([
2
,
3
,
4
,
5
,
1
,
2
,
3
,
4
,
1
,
2
,
3
,
4
]),
context_token_ids
=
np
.
array
([
2
,
3
,
4
,
5
,
1
,
2
,
3
,
4
,
1
,
2
,
3
,
4
]))
min_n
=
3
,
max_n
=
4
,
k
=
2
,
)
assert
np
.
array_equal
(
result
,
np
.
array
([
1
,
2
]))
# Not [5, 1]
assert
np
.
array_equal
(
result
,
np
.
array
([
1
,
2
]))
# Not [5, 1]
# Match for 2-gram and 3-gram, but not 4-gram.
# Match for 2-gram and 3-gram, but not 4-gram.
result
=
proposer
.
propose
(
result
=
ngram_proposer
(
context_token_ids
=
np
.
array
([
3
,
4
,
5
,
2
,
3
,
4
,
1
,
2
,
3
,
4
]),
2
,
4
,
min_n
=
2
,
2
).
propose
(
context_token_ids
=
np
.
array
([
3
,
4
,
5
,
2
,
3
,
4
,
1
,
2
,
3
,
4
]))
max_n
=
4
,
k
=
2
,
)
assert
np
.
array_equal
(
result
,
np
.
array
([
1
,
2
]))
# Not [5, 2]
assert
np
.
array_equal
(
result
,
np
.
array
([
1
,
2
]))
# Not [5, 2]
vllm/config.py
View file @
bb3605db
...
@@ -2306,7 +2306,8 @@ class SpeculativeConfig:
...
@@ -2306,7 +2306,8 @@ class SpeculativeConfig:
if
self
.
model
is
None
and
self
.
num_speculative_tokens
is
not
None
:
if
self
.
model
is
None
and
self
.
num_speculative_tokens
is
not
None
:
# TODO(Shangming): Refactor mtp configuration logic when supporting
# TODO(Shangming): Refactor mtp configuration logic when supporting
# mtp acceleration for more models besides deepseek_v3
# mtp acceleration for more models besides deepseek_v3
if
self
.
target_model_config
.
hf_text_config
.
model_type
\
if
self
.
target_model_config
and
\
self
.
target_model_config
.
hf_text_config
.
model_type
\
==
"deepseek_v3"
:
==
"deepseek_v3"
:
# use the draft model from the same model:
# use the draft model from the same model:
self
.
model
=
self
.
target_model_config
.
model
self
.
model
=
self
.
target_model_config
.
model
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment