Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2110557d
Unverified
Commit
2110557d
authored
Jun 26, 2024
by
Nick Hill
Committed by
GitHub
Jun 27, 2024
Browse files
[BugFix] Fix cuda graph for MLPSpeculator (#5875)
Co-authored-by:
Abhinav Goyal
<
abhinav.goyal@flipkart.com
>
parent
b9e84259
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
4 deletions
+6
-4
examples/offline_inference_mlpspeculator.py
examples/offline_inference_mlpspeculator.py
+0
-1
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+6
-3
No files found.
examples/offline_inference_mlpspeculator.py
View file @
2110557d
...
...
@@ -52,7 +52,6 @@ if __name__ == "__main__":
speculative_model
=
"ibm-fms/llama-13b-accelerator"
,
# These are currently required for MLPSpeculator decoding
use_v2_block_manager
=
True
,
enforce_eager
=
True
,
)
print
(
"With speculation"
)
...
...
vllm/worker/model_runner.py
View file @
2110557d
...
...
@@ -1020,10 +1020,13 @@ class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]):
if
self
.
return_hidden_states
:
# we only need to pass hidden states of most recent token
if
model_input
.
is_prompt
:
assert
model_input
.
sampling_metadata
is
not
None
hidden_states
=
hidden_states
.
index_select
(
0
,
model_input
.
sampling_metadata
.
selected_token_indices
)
indices
=
model_input
.
sampling_metadata
.
selected_token_indices
if
model_input
.
is_prompt
:
hidden_states
=
hidden_states
.
index_select
(
0
,
indices
)
elif
decode_meta
.
use_cuda_graph
:
hidden_states
=
hidden_states
[:
len
(
indices
)]
output
.
hidden_states
=
hidden_states
return
output
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment