Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b971f915
Unverified
Commit
b971f915
authored
Sep 11, 2025
by
Nick Hill
Committed by
GitHub
Sep 11, 2025
Browse files
[BugFix] Fix tokenize asyncio task leak (#24677)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
c733bd5e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
30 additions
and
28 deletions
+30
-28
vllm/entrypoints/renderer.py
vllm/entrypoints/renderer.py
+30
-28
No files found.
vllm/entrypoints/renderer.py
View file @
b971f915
...
@@ -168,8 +168,8 @@ class BaseRenderer(ABC):
...
@@ -168,8 +168,8 @@ class BaseRenderer(ABC):
if
isinstance
(
prompt_embeds
,
list
):
if
isinstance
(
prompt_embeds
,
list
):
return
[
_load_and_validate_embed
(
embed
)
for
embed
in
prompt_embeds
]
return
[
_load_and_validate_embed
(
embed
)
for
embed
in
prompt_embeds
]
else
:
return
[
_load_and_validate_embed
(
prompt_embeds
)]
return
[
_load_and_validate_embed
(
prompt_embeds
)]
class
CompletionRenderer
(
BaseRenderer
):
class
CompletionRenderer
(
BaseRenderer
):
...
@@ -182,7 +182,7 @@ class CompletionRenderer(BaseRenderer):
...
@@ -182,7 +182,7 @@ class CompletionRenderer(BaseRenderer):
AsyncMicrobatchTokenizer
]]
=
None
,
AsyncMicrobatchTokenizer
]]
=
None
,
):
):
super
().
__init__
(
model_config
,
tokenizer
)
super
().
__init__
(
model_config
,
tokenizer
)
self
.
async_tokenizer_pool
=
async_tokenizer_pool
or
{}
self
.
async_tokenizer_pool
=
async_tokenizer_pool
self
.
async_tokenizer
:
Optional
[
AsyncMicrobatchTokenizer
]
=
None
self
.
async_tokenizer
:
Optional
[
AsyncMicrobatchTokenizer
]
=
None
async
def
render_prompt
(
async
def
render_prompt
(
...
@@ -208,23 +208,21 @@ class CompletionRenderer(BaseRenderer):
...
@@ -208,23 +208,21 @@ class CompletionRenderer(BaseRenderer):
for
prompt_input
in
batch_inputs
:
for
prompt_input
in
batch_inputs
:
if
prompt_input
[
"is_tokens"
]
is
True
:
if
prompt_input
[
"is_tokens"
]
is
True
:
# Token input
# Token input
detokenize_task
=
asyncio
.
create_task
(
# Note: detokenization is needed when echo is enabled,
# Note: detokenization is needed when echo is enabled,
# where the input token IDs are decoded back to text.
# where the input token IDs are decoded back to text.
task
=
self
.
_maybe_detokenize
(
prompt_input
[
"content"
],
self
.
_maybe_detokenize
(
prompt_input
[
"content"
],
config
.
max_length
,
config
.
max_length
,
truncate_prompt_tokens
,
truncate_prompt_tokens
,
config
.
cache_salt
,
config
.
cache_salt
,
config
.
needs_detokenization
)
config
.
needs_detokenization
))
tasks
.
append
(
detokenize_task
)
else
:
else
:
# Text input
# Text input
t
okenize_task
=
asyncio
.
create_task
(
t
ask
=
self
.
_tokenize
(
prompt_input
[
"content"
],
self
.
_tokenize
(
prompt_input
[
"content"
],
config
.
max_length
,
config
.
max_length
,
truncate_prompt_tokens
,
truncate_prompt_tokens
,
config
.
add_special_tokens
,
config
.
add_special_tokens
,
config
.
cache_salt
)
)
config
.
cache_salt
)
tasks
.
append
(
tokenize_
task
)
tasks
.
append
(
task
)
# Wait for all text tokenization to finish
# Wait for all text tokenization to finish
if
tasks
:
if
tasks
:
...
@@ -356,20 +354,24 @@ class CompletionRenderer(BaseRenderer):
...
@@ -356,20 +354,24 @@ class CompletionRenderer(BaseRenderer):
def
_get_async_tokenizer
(
self
)
->
AsyncMicrobatchTokenizer
:
def
_get_async_tokenizer
(
self
)
->
AsyncMicrobatchTokenizer
:
"""Get or create async tokenizer using shared pool."""
"""Get or create async tokenizer using shared pool."""
if
self
.
async_tokenizer
is
not
None
:
async_tokenizer
=
self
.
async_tokenizer
return
self
.
async_tokenizer
if
async_tokenizer
is
not
None
:
return
async_tokenizer
tokenizer
=
self
.
tokenizer
if
self
.
tokenizer
is
None
:
if
self
.
tokenizer
is
None
:
raise
ValueError
(
raise
ValueError
(
"No tokenizer available for text input processing"
)
"No tokenizer available for text input processing"
)
# Check shared pool first
if
self
.
async_tokenizer_pool
is
None
:
if
self
.
tokenizer
in
self
.
async_tokenizer_pool
:
async_tokenizer
=
AsyncMicrobatchTokenizer
(
tokenizer
)
return
self
.
async_tokenizer_pool
[
self
.
tokenizer
]
else
:
async_tokenizer
=
self
.
async_tokenizer_pool
.
get
(
tokenizer
)
# Create new async tokenizer and add to pool
if
async_tokenizer
is
None
:
self
.
async_tokenizer
=
AsyncMicrobatchTokenizer
(
self
.
tokenizer
)
async_tokenizer
=
AsyncMicrobatchTokenizer
(
tokenizer
)
self
.
async_tokenizer_pool
[
self
.
tokenizer
]
=
self
.
async_tokenizer
self
.
async_tokenizer_pool
[
tokenizer
]
=
async_tokenizer
return
self
.
async_tokenizer
self
.
async_tokenizer
=
async_tokenizer
return
async_tokenizer
def
_create_tokens_prompt
(
def
_create_tokens_prompt
(
self
,
self
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment