Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ccdae737
Unverified
Commit
ccdae737
authored
Aug 08, 2025
by
Nick Hill
Committed by
GitHub
Aug 08, 2025
Browse files
[BugFix] Don't cancel asyncio tasks directly from destructors (#22476)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
90406390
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
23 additions
and
14 deletions
+23
-14
vllm/utils/__init__.py
vllm/utils/__init__.py
+17
-6
vllm/v1/engine/async_llm.py
vllm/v1/engine/async_llm.py
+2
-3
vllm/v1/engine/core_client.py
vllm/v1/engine/core_client.py
+4
-5
No files found.
vllm/utils/__init__.py
View file @
ccdae737
...
@@ -687,19 +687,30 @@ class AsyncMicrobatchTokenizer:
...
@@ -687,19 +687,30 @@ class AsyncMicrobatchTokenizer:
max_length
=
kwargs
.
get
(
"max_length"
)
max_length
=
kwargs
.
get
(
"max_length"
)
if
not
truncation
:
if
not
truncation
:
return
(
"encode"
,
add_special_tokens
,
False
,
None
)
return
"encode"
,
add_special_tokens
,
False
,
None
model_max
=
getattr
(
self
.
tokenizer
,
"model_max_length"
,
None
)
model_max
=
getattr
(
self
.
tokenizer
,
"model_max_length"
,
None
)
if
max_length
is
None
or
(
model_max
is
not
None
if
max_length
is
None
or
(
model_max
is
not
None
and
max_length
==
model_max
):
and
max_length
==
model_max
):
return
(
"encode"
,
add_special_tokens
,
True
,
"model_max"
)
return
"encode"
,
add_special_tokens
,
True
,
"model_max"
return
(
"encode"
,
"other"
)
return
"encode"
,
"other"
def
__del__
(
self
):
def
__del__
(
self
):
for
task
in
self
.
_batcher_tasks
:
if
((
tasks
:
=
getattr
(
self
,
"_batcher_tasks"
,
None
))
if
not
task
.
done
():
and
(
loop
:
=
getattr
(
self
,
"_loop"
,
None
))
task
.
cancel
()
and
not
loop
.
is_closed
()):
def
cancel_tasks
():
for
task
in
tasks
:
task
.
cancel
()
loop
.
call_soon_threadsafe
(
cancel_tasks
)
def
cancel_task_threadsafe
(
task
:
Task
):
if
task
and
not
task
.
done
()
and
not
(
loop
:
=
task
.
get_loop
()).
is_closed
():
loop
.
call_soon_threadsafe
(
task
.
cancel
)
def
make_async
(
def
make_async
(
...
...
vllm/v1/engine/async_llm.py
View file @
ccdae737
...
@@ -27,7 +27,7 @@ from vllm.transformers_utils.config import (
...
@@ -27,7 +27,7 @@ from vllm.transformers_utils.config import (
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
from
vllm.transformers_utils.tokenizer_group
import
init_tokenizer_from_configs
from
vllm.transformers_utils.tokenizer_group
import
init_tokenizer_from_configs
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.usage.usage_lib
import
UsageContext
from
vllm.utils
import
Device
,
cdiv
,
deprecate_kwargs
from
vllm.utils
import
Device
,
cancel_task_threadsafe
,
cdiv
,
deprecate_kwargs
from
vllm.v1.engine
import
EngineCoreRequest
from
vllm.v1.engine
import
EngineCoreRequest
from
vllm.v1.engine.core_client
import
EngineCoreClient
from
vllm.v1.engine.core_client
import
EngineCoreClient
from
vllm.v1.engine.exceptions
import
EngineDeadError
,
EngineGenerateError
from
vllm.v1.engine.exceptions
import
EngineDeadError
,
EngineGenerateError
...
@@ -219,8 +219,7 @@ class AsyncLLM(EngineClient):
...
@@ -219,8 +219,7 @@ class AsyncLLM(EngineClient):
if
engine_core
:
=
getattr
(
self
,
"engine_core"
,
None
):
if
engine_core
:
=
getattr
(
self
,
"engine_core"
,
None
):
engine_core
.
shutdown
()
engine_core
.
shutdown
()
if
handler
:
=
getattr
(
self
,
"output_handler"
,
None
):
cancel_task_threadsafe
(
getattr
(
self
,
"output_handler"
,
None
))
handler
.
cancel
()
async
def
get_supported_tasks
(
self
)
->
tuple
[
SupportedTask
,
...]:
async
def
get_supported_tasks
(
self
)
->
tuple
[
SupportedTask
,
...]:
return
await
self
.
engine_core
.
get_supported_tasks_async
()
return
await
self
.
engine_core
.
get_supported_tasks_async
()
...
...
vllm/v1/engine/core_client.py
View file @
ccdae737
...
@@ -23,7 +23,8 @@ from vllm.config import VllmConfig
...
@@ -23,7 +23,8 @@ from vllm.config import VllmConfig
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.lora.request
import
LoRARequest
from
vllm.tasks
import
SupportedTask
from
vllm.tasks
import
SupportedTask
from
vllm.utils
import
get_open_port
,
get_open_zmq_inproc_path
,
make_zmq_socket
from
vllm.utils
import
(
cancel_task_threadsafe
,
get_open_port
,
get_open_zmq_inproc_path
,
make_zmq_socket
)
from
vllm.v1.engine
import
(
EngineCoreOutputs
,
EngineCoreRequest
,
from
vllm.v1.engine
import
(
EngineCoreOutputs
,
EngineCoreRequest
,
EngineCoreRequestType
,
EngineCoreRequestType
,
ReconfigureDistributedRequest
,
ReconfigureRankType
,
ReconfigureDistributedRequest
,
ReconfigureRankType
,
...
@@ -342,10 +343,8 @@ class BackgroundResources:
...
@@ -342,10 +343,8 @@ class BackgroundResources:
if
self
.
coordinator
is
not
None
:
if
self
.
coordinator
is
not
None
:
self
.
coordinator
.
close
()
self
.
coordinator
.
close
()
if
self
.
output_queue_task
is
not
None
:
cancel_task_threadsafe
(
self
.
output_queue_task
)
self
.
output_queue_task
.
cancel
()
cancel_task_threadsafe
(
self
.
stats_update_task
)
if
self
.
stats_update_task
is
not
None
:
self
.
stats_update_task
.
cancel
()
# ZMQ context termination can hang if the sockets
# ZMQ context termination can hang if the sockets
# aren't explicitly closed first.
# aren't explicitly closed first.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment