Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7da24875
Unverified
Commit
7da24875
authored
Sep 30, 2024
by
youkaichao
Committed by
GitHub
Oct 01, 2024
Browse files
[torch.compile] fix tensor alias (#8982)
parent
aaccca2b
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
3 deletions
+9
-3
vllm/worker/embedding_model_runner.py
vllm/worker/embedding_model_runner.py
+2
-1
vllm/worker/enc_dec_model_runner.py
vllm/worker/enc_dec_model_runner.py
+2
-1
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+5
-1
No files found.
vllm/worker/embedding_model_runner.py
View file @
7da24875
...
...
@@ -103,7 +103,8 @@ class EmbeddingModelRunner(
# a placeholder (it has wide hardware support).
kv_caches
=
[
torch
.
tensor
([],
dtype
=
torch
.
float32
,
device
=
self
.
device
)
]
*
num_layers
for
_
in
range
(
num_layers
)
]
execute_model_kwargs
=
{
"input_ids"
:
...
...
vllm/worker/enc_dec_model_runner.py
View file @
7da24875
...
...
@@ -348,7 +348,8 @@ class EncoderDecoderModelRunner(GPUModelRunnerBase[EncoderDecoderModelInput]):
# a placeholder (it has wide hardware support).
kv_caches
=
[
torch
.
tensor
([],
dtype
=
torch
.
float32
,
device
=
self
.
device
)
]
*
num_layers
for
_
in
range
(
num_layers
)
]
finished_requests_ids
=
[
seq
.
request_id
for
seq
in
seqs
]
model_input
=
self
.
prepare_model_input
(
seqs
,
finished_requests_ids
=
finished_requests_ids
)
...
...
vllm/worker/model_runner.py
View file @
7da24875
...
...
@@ -1244,9 +1244,13 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
# it by reference, rather by specializing on the value ``None``.
# the `dtype` argument does not matter, and we use `float32` as
# a placeholder (it has wide hardware support).
# it is important to create tensors inside the loop, rather than
# multiplying the list, to avoid Dynamo from treating them as
# tensor aliasing.
kv_caches
=
[
torch
.
tensor
([],
dtype
=
torch
.
float32
,
device
=
self
.
device
)
]
*
num_layers
for
_
in
range
(
num_layers
)
]
finished_requests_ids
=
[
seq
.
request_id
for
seq
in
seqs
]
model_input
=
self
.
prepare_model_input
(
seqs
,
finished_requests_ids
=
finished_requests_ids
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment