Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a178a0b4
Unverified
Commit
a178a0b4
authored
Nov 24, 2025
by
Nick Hill
Committed by
GitHub
Nov 25, 2025
Browse files
[BugFix] Fix duplicate id tool-call race condition (#29355)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
b8328b49
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
7 deletions
+16
-7
vllm/entrypoints/openai/serving_chat.py
vllm/entrypoints/openai/serving_chat.py
+9
-4
vllm/entrypoints/openai/serving_engine.py
vllm/entrypoints/openai/serving_engine.py
+7
-3
No files found.
vllm/entrypoints/openai/serving_chat.py
View file @
a178a0b4
...
...
@@ -273,6 +273,11 @@ class OpenAIServingChat(OpenAIServing):
try
:
for
i
,
engine_prompt
in
enumerate
(
engine_prompts
):
prompt_text
,
_
,
_
=
self
.
_get_prompt_components
(
request_prompts
[
i
])
# If we are creating sub requests for multiple prompts, ensure that they
# have unique request ids.
sub_request_id
=
(
request_id
if
len
(
engine_prompts
)
==
1
else
f
"
{
request_id
}
_
{
i
}
"
)
if
self
.
default_sampling_params
is
None
:
self
.
default_sampling_params
=
{}
...
...
@@ -301,7 +306,7 @@ class OpenAIServingChat(OpenAIServing):
)
self
.
_log_inputs
(
request_id
,
sub_
request_id
,
request_prompts
[
i
],
params
=
sampling_params
,
lora_request
=
lora_request
,
...
...
@@ -316,14 +321,14 @@ class OpenAIServingChat(OpenAIServing):
if
isinstance
(
sampling_params
,
BeamSearchParams
):
generator
=
self
.
beam_search
(
prompt
=
engine_prompt
,
request_id
=
request_id
,
request_id
=
sub_
request_id
,
params
=
sampling_params
,
lora_request
=
lora_request
,
trace_headers
=
trace_headers
,
)
else
:
engine_request
,
tokenization_kwargs
=
await
self
.
_process_inputs
(
request_id
,
sub_
request_id
,
engine_prompt
,
sampling_params
,
lora_request
=
lora_request
,
...
...
@@ -334,7 +339,7 @@ class OpenAIServingChat(OpenAIServing):
generator
=
self
.
engine_client
.
generate
(
engine_request
,
sampling_params
,
request_id
,
sub_
request_id
,
lora_request
=
lora_request
,
trace_headers
=
trace_headers
,
priority
=
request
.
priority
,
...
...
vllm/entrypoints/openai/serving_engine.py
View file @
a178a0b4
...
...
@@ -1242,16 +1242,19 @@ class OpenAIServing:
):
prompt_text
,
_
,
_
=
self
.
_get_prompt_components
(
request_prompt
)
orig_priority
=
priority
sub_request
=
0
while
True
:
# Ensure that each sub-request has a unique request id.
sub_request_id
=
f
"
{
request_id
}
_
{
sub_request
}
"
self
.
_log_inputs
(
request_id
,
sub_
request_id
,
request_prompt
,
params
=
sampling_params
,
lora_request
=
lora_request
,
)
trace_headers
=
kwargs
.
get
(
"trace_headers"
)
engine_request
,
tokenization_kwargs
=
await
self
.
_process_inputs
(
request_id
,
sub_
request_id
,
engine_prompt
,
sampling_params
,
lora_request
=
lora_request
,
...
...
@@ -1262,7 +1265,7 @@ class OpenAIServing:
generator
=
self
.
engine_client
.
generate
(
engine_request
,
sampling_params
,
request_id
,
sub_
request_id
,
lora_request
=
lora_request
,
priority
=
priority
,
prompt_text
=
prompt_text
,
...
...
@@ -1295,6 +1298,7 @@ class OpenAIServing:
sampling_params
.
max_tokens
=
self
.
max_model_len
-
len
(
prompt_token_ids
)
# OPTIMIZATION
priority
=
orig_priority
-
1
sub_request
+=
1
def
_get_prompt_components
(
self
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment