Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2e7796f2
Unverified
Commit
2e7796f2
authored
May 11, 2024
by
heeju-kim2
Committed by
GitHub
May 10, 2024
Browse files
[Speculative decoding] CUDA graph support (#4295)
Co-authored-by:
Cade Daniel
<
edacih@gmail.com
>
parent
706588a7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
37 additions
and
0 deletions
+37
-0
tests/spec_decode/e2e/test_multistep_correctness.py
tests/spec_decode/e2e/test_multistep_correctness.py
+37
-0
No files found.
tests/spec_decode/e2e/test_multistep_correctness.py
View file @
2e7796f2
...
@@ -611,3 +611,40 @@ def test_many_k(baseline_llm_generator, test_llm_generator, batch_size: int,
...
@@ -611,3 +611,40 @@ def test_many_k(baseline_llm_generator, test_llm_generator, batch_size: int,
batch_size
,
batch_size
,
max_output_len
=
output_len
,
max_output_len
=
output_len
,
force_output_len
=
True
)
force_output_len
=
True
)
@
pytest
.
mark
.
parametrize
(
"common_llm_kwargs"
,
[{
# Required for spec decode.
"use_v2_block_manager"
:
True
,
# Verify equality when cuda graphs allowed.
"enforce_eager"
:
False
,
"model"
:
"JackFram/llama-68m"
,
}])
@
pytest
.
mark
.
parametrize
(
"per_test_common_llm_kwargs"
,
[
{
# Identical models.
"speculative_model"
:
"JackFram/llama-68m"
,
"num_speculative_tokens"
:
5
,
},
])
@
pytest
.
mark
.
parametrize
(
"baseline_llm_kwargs"
,
[{}])
@
pytest
.
mark
.
parametrize
(
"test_llm_kwargs"
,
[{}])
@
pytest
.
mark
.
parametrize
(
"batch_size"
,
[
8
])
@
pytest
.
mark
.
parametrize
(
"output_len"
,
[
32
])
@
pytest
.
mark
.
parametrize
(
"seed"
,
[
1
])
def
test_spec_decode_cuda_graph
(
baseline_llm_generator
,
test_llm_generator
,
batch_size
,
output_len
):
"""Verify spec decode equality when cuda graphs are enabled.
"""
run_greedy_equality_correctness_test
(
baseline_llm_generator
,
test_llm_generator
,
batch_size
,
max_output_len
=
output_len
,
force_output_len
=
True
,
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment