Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
966e3169
Unverified
Commit
966e3169
authored
Nov 05, 2024
by
Wallas Henrique
Committed by
GitHub
Nov 06, 2024
Browse files
[Bugfix] Fix pickle of input when async output processing is on (#9931)
Signed-off-by:
Wallas Santos
<
wallashss@ibm.com
>
parent
43300bd9
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
0 deletions
+38
-0
tests/basic_correctness/test_basic_correctness.py
tests/basic_correctness/test_basic_correctness.py
+26
-0
vllm/worker/model_runner.py
vllm/worker/model_runner.py
+12
-0
No files found.
tests/basic_correctness/test_basic_correctness.py
View file @
966e3169
...
...
@@ -156,3 +156,29 @@ def test_model_with_failure(vllm_runner) -> None:
ModelInputForGPUWithSamplingMetadata
)
finally
:
os
.
remove
(
filename
)
def
test_failure_with_async_out_proc
(
vllm_runner
)
->
None
:
filename
=
None
try
:
with
vllm_runner
(
"facebook/opt-125m"
,
dtype
=
"half"
,
enforce_eager
=
False
,
gpu_memory_utilization
=
0.7
)
as
vllm_model
,
\
patch
(
"vllm.model_executor.models.opt.OPTForCausalLM.forward"
,
side_effect
=
ValueError
()):
model_config
=
vllm_model
.
model
.
llm_engine
.
model_config
assert
model_config
.
use_async_output_proc
with
pytest
.
raises
(
ValueError
)
as
exc_info
:
vllm_model
.
generate_greedy
(
'how to make pizza?'
,
250
)
matches
=
re
.
search
(
r
"input dumped to (.+).pkl"
,
str
(
exc_info
.
value
))
assert
matches
is
not
None
filename
=
f
"
{
matches
.
group
(
1
)
}
.pkl"
finally
:
# Clean up
if
filename
is
not
None
:
os
.
remove
(
filename
)
pass
vllm/worker/model_runner.py
View file @
966e3169
...
...
@@ -136,6 +136,18 @@ class ModelInputForGPU(ModelRunnerInputBase):
attn_backend
,
tensor_dict
)
return
cls
(
**
tensor_dict
)
# Exclude `async_callback` to be able to pickle this object
def
__getstate__
(
self
):
state
=
self
.
__dict__
.
copy
()
del
state
[
"async_callback"
]
return
state
# TODO: What happens when we depickle this object?
# How can we update this callback to properly pass it to the engine?
def
__setstate__
(
self
,
state
):
self
.
__dict__
.
update
(
state
)
self
.
__dict__
.
update
({
'async_callback'
:
None
})
@
dataclass
(
frozen
=
True
)
class
ModelInputForGPUWithSamplingMetadata
(
ModelInputForGPU
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment