Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f6b50405
Unverified
Commit
f6b50405
authored
Aug 15, 2025
by
Nick Hill
Committed by
GitHub
Aug 16, 2025
Browse files
[Frontend] Avoid list copies in `serving_chat.py` (#22947)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
fbd88728
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
15 deletions
+16
-15
vllm/entrypoints/openai/serving_chat.py
vllm/entrypoints/openai/serving_chat.py
+15
-14
vllm/reasoning/abs_reasoning_parsers.py
vllm/reasoning/abs_reasoning_parsers.py
+1
-1
No files found.
vllm/entrypoints/openai/serving_chat.py
View file @
f6b50405
...
@@ -50,6 +50,7 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
...
@@ -50,6 +50,7 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
from
vllm.transformers_utils.tokenizers
import
(
maybe_serialize_tool_calls
,
from
vllm.transformers_utils.tokenizers
import
(
maybe_serialize_tool_calls
,
truncate_tool_call_ids
,
truncate_tool_call_ids
,
validate_request_params
)
validate_request_params
)
from
vllm.utils
import
as_list
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
@@ -670,10 +671,10 @@ class OpenAIServingChat(OpenAIServing):
...
@@ -670,10 +671,10 @@ class OpenAIServingChat(OpenAIServing):
# avoid the None + list error.
# avoid the None + list error.
if
previous_token_ids
:
if
previous_token_ids
:
current_token_ids
=
previous_token_ids
+
list
(
current_token_ids
=
previous_token_ids
+
as_
list
(
output
.
token_ids
)
output
.
token_ids
)
else
:
else
:
current_token_ids
=
list
(
output
.
token_ids
)
current_token_ids
=
as_
list
(
output
.
token_ids
)
if
self
.
use_harmony
:
if
self
.
use_harmony
:
if
is_final
:
if
is_final
:
...
@@ -703,11 +704,10 @@ class OpenAIServingChat(OpenAIServing):
...
@@ -703,11 +704,10 @@ class OpenAIServingChat(OpenAIServing):
# set reasoning status to end.
# set reasoning status to end.
# Only keep 'content', remove 'reasoning_content'.
# Only keep 'content', remove 'reasoning_content'.
if
reasoning_parser
.
is_reasoning_end
(
if
reasoning_parser
.
is_reasoning_end
(
list
(
output
.
token_ids
))
or
\
as_list
(
output
.
token_ids
))
or
(
(
res
.
prompt_token_ids
and
res
.
prompt_token_ids
reasoning_parser
.
is_reasoning_end
(
and
reasoning_parser
.
is_reasoning_end
(
list
(
res
.
prompt_token_ids
)
res
.
prompt_token_ids
)):
)):
reasoning_end_arr
[
i
]
=
True
reasoning_end_arr
[
i
]
=
True
if
delta_message
and
delta_message
.
content
:
if
delta_message
and
delta_message
.
content
:
# This need to be added to next `delta_text`
# This need to be added to next `delta_text`
...
@@ -771,6 +771,7 @@ class OpenAIServingChat(OpenAIServing):
...
@@ -771,6 +771,7 @@ class OpenAIServingChat(OpenAIServing):
assert
reasoning_parser
is
not
None
assert
reasoning_parser
is
not
None
assert
added_content_delta_arr
is
not
None
assert
added_content_delta_arr
is
not
None
assert
reasoning_end_arr
is
not
None
assert
reasoning_end_arr
is
not
None
output_token_ids
=
as_list
(
output
.
token_ids
)
if
not
reasoning_end_arr
[
i
]:
if
not
reasoning_end_arr
[
i
]:
delta_message
=
(
delta_message
=
(
reasoning_parser
.
reasoning_parser
.
...
@@ -780,7 +781,7 @@ class OpenAIServingChat(OpenAIServing):
...
@@ -780,7 +781,7 @@ class OpenAIServingChat(OpenAIServing):
delta_text
,
delta_text
,
previous_token_ids
,
previous_token_ids
,
current_token_ids
,
current_token_ids
,
output
.
token_ids
,
output
_
token_ids
,
))
))
# When encountering think end id in prompt_token_ids
# When encountering think end id in prompt_token_ids
# i.e {"enable_thinking": False},
# i.e {"enable_thinking": False},
...
@@ -789,9 +790,9 @@ class OpenAIServingChat(OpenAIServing):
...
@@ -789,9 +790,9 @@ class OpenAIServingChat(OpenAIServing):
# to 'reasoning_content'.
# to 'reasoning_content'.
if
res
.
prompt_token_ids
and
\
if
res
.
prompt_token_ids
and
\
reasoning_parser
.
is_reasoning_end
(
reasoning_parser
.
is_reasoning_end
(
list
(
res
.
prompt_token_ids
)
)
:
res
.
prompt_token_ids
):
reasoning_end_arr
[
i
]
=
True
reasoning_end_arr
[
i
]
=
True
current_token_ids
=
list
(
output
.
token_ids
)
current_token_ids
=
output
_
token_ids
if
delta_message
and
delta_message
.
content
:
if
delta_message
and
delta_message
.
content
:
current_text
=
delta_message
.
content
current_text
=
delta_message
.
content
delta_message
.
content
=
None
delta_message
.
content
=
None
...
@@ -802,11 +803,11 @@ class OpenAIServingChat(OpenAIServing):
...
@@ -802,11 +803,11 @@ class OpenAIServingChat(OpenAIServing):
# Remove the text and token ids related
# Remove the text and token ids related
# to 'reasoning_content'.
# to 'reasoning_content'.
if
reasoning_parser
.
is_reasoning_end
(
if
reasoning_parser
.
is_reasoning_end
(
list
(
output
.
token_ids
)
)
:
output
_
token_ids
):
reasoning_end_arr
[
i
]
=
True
reasoning_end_arr
[
i
]
=
True
current_token_ids
=
\
current_token_ids
=
\
reasoning_parser
.
extract_content_ids
(
reasoning_parser
.
extract_content_ids
(
list
(
output
.
token_ids
)
)
output
_
token_ids
)
if
delta_message
and
delta_message
.
content
:
if
delta_message
and
delta_message
.
content
:
current_text
=
delta_message
.
content
current_text
=
delta_message
.
content
delta_message
.
content
=
None
delta_message
.
content
=
None
...
@@ -815,7 +816,7 @@ class OpenAIServingChat(OpenAIServing):
...
@@ -815,7 +816,7 @@ class OpenAIServingChat(OpenAIServing):
# handle tool calls only after reasoning is done,
# handle tool calls only after reasoning is done,
else
:
else
:
delta_token_ids
=
list
(
output
.
token_ids
)
delta_token_ids
=
output
_
token_ids
# First time to tool call,
# First time to tool call,
# add the remaining text and token ids
# add the remaining text and token ids
# to delta from previous
# to delta from previous
...
@@ -899,7 +900,7 @@ class OpenAIServingChat(OpenAIServing):
...
@@ -899,7 +900,7 @@ class OpenAIServingChat(OpenAIServing):
self
.
request_logger
.
log_outputs
(
self
.
request_logger
.
log_outputs
(
request_id
=
request_id
,
request_id
=
request_id
,
outputs
=
delta_content
,
outputs
=
delta_content
,
output_token_ids
=
list
(
output
.
token_ids
),
output_token_ids
=
as_
list
(
output
.
token_ids
),
finish_reason
=
output
.
finish_reason
,
finish_reason
=
output
.
finish_reason
,
is_streaming
=
True
,
is_streaming
=
True
,
delta
=
True
,
delta
=
True
,
...
...
vllm/reasoning/abs_reasoning_parsers.py
View file @
f6b50405
...
@@ -44,7 +44,7 @@ class ReasoningParser:
...
@@ -44,7 +44,7 @@ class ReasoningParser:
return
self
.
model_tokenizer
.
get_vocab
()
return
self
.
model_tokenizer
.
get_vocab
()
@
abstractmethod
@
abstractmethod
def
is_reasoning_end
(
self
,
input_ids
:
Sequence
[
int
])
->
bool
:
def
is_reasoning_end
(
self
,
input_ids
:
list
[
int
])
->
bool
:
"""
"""
Check if the reasoning content ends in the input_ids.
Check if the reasoning content ends in the input_ids.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment