Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
70116459
Unverified
Commit
70116459
authored
Apr 25, 2025
by
Nick Hill
Committed by
GitHub
Apr 25, 2025
Browse files
[BugFix][Frontend] Fix `LLM.chat()` tokenization (#16081)
Signed-off-by:
Nick Hill
<
nhill@redhat.com
>
parent
65e262b9
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
43 additions
and
14 deletions
+43
-14
tests/entrypoints/llm/test_chat.py
tests/entrypoints/llm/test_chat.py
+28
-0
vllm/entrypoints/llm.py
vllm/entrypoints/llm.py
+15
-14
No files found.
tests/entrypoints/llm/test_chat.py
View file @
70116459
...
...
@@ -89,3 +89,31 @@ def test_chat_multi_image(image_urls: list[str]):
}]
outputs
=
llm
.
chat
(
messages
)
assert
len
(
outputs
)
>=
0
def
test_llm_chat_tokenization_no_double_bos
():
"""
LLM.chat() should not add special tokens when using chat templates.
Check we get a single BOS token for llama chat.
"""
llm
=
LLM
(
model
=
"meta-llama/Llama-3.2-1B-Instruct"
,
enforce_eager
=
True
)
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant"
},
{
"role"
:
"user"
,
"content"
:
"Hello!"
},
]
outputs
=
llm
.
chat
(
messages
)
assert
len
(
outputs
)
==
1
prompt_token_ids
=
getattr
(
outputs
[
0
],
"prompt_token_ids"
,
None
)
assert
prompt_token_ids
is
not
None
bos_token
=
llm
.
get_tokenizer
().
bos_token_id
# Ensure we have a single BOS
assert
prompt_token_ids
[
0
]
==
bos_token
assert
prompt_token_ids
[
1
]
!=
bos_token
,
"Double BOS"
vllm/entrypoints/llm.py
View file @
70116459
...
...
@@ -251,8 +251,12 @@ class LLM:
self
.
request_counter
=
Counter
()
self
.
default_sampling_params
:
Union
[
dict
[
str
,
Any
],
None
]
=
None
def
get_tokenizer
(
self
)
->
AnyTokenizer
:
return
self
.
llm_engine
.
get_tokenizer_group
().
tokenizer
def
get_tokenizer
(
self
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
)
->
AnyTokenizer
:
return
self
.
llm_engine
.
get_tokenizer_group
().
get_lora_tokenizer
(
lora_request
)
def
set_tokenizer
(
self
,
tokenizer
:
AnyTokenizer
)
->
None
:
tokenizer_group
=
self
.
llm_engine
.
get_tokenizer_group
()
...
...
@@ -712,7 +716,7 @@ class LLM:
cast
(
list
[
ChatCompletionMessageParam
],
messages
)
]
tokenizer
=
self
.
get_tokenizer
()
tokenizer
=
self
.
get_tokenizer
(
lora_request
)
model_config
=
self
.
llm_engine
.
get_model_config
()
resolved_content_format
=
resolve_chat_template_content_format
(
chat_template
,
...
...
@@ -735,9 +739,8 @@ class LLM:
content_format
=
resolved_content_format
,
)
prompt_data
:
Union
[
str
,
list
[
int
]]
if
isinstance
(
tokenizer
,
MistralTokenizer
):
prompt_
data
=
apply_mistral_chat_template
(
prompt_
token_ids
=
apply_mistral_chat_template
(
tokenizer
,
messages
=
msgs
,
chat_template
=
chat_template
,
...
...
@@ -746,7 +749,7 @@ class LLM:
continue_final_message
=
continue_final_message
,
)
else
:
prompt_
data
=
apply_hf_chat_template
(
prompt_
str
=
apply_hf_chat_template
(
tokenizer
,
trust_remote_code
=
model_config
.
trust_remote_code
,
conversation
=
conversation
,
...
...
@@ -755,12 +758,12 @@ class LLM:
add_generation_prompt
=
add_generation_prompt
,
continue_final_message
=
continue_final_message
,
)
# Special tokens are already included in chat templates so
# should not be added by the tokenizer in this case.
prompt_token_ids
=
tokenizer
.
encode
(
prompt_str
,
add_special_tokens
=
False
)
prompt
:
Union
[
TokensPrompt
,
TextPrompt
]
if
is_list_of
(
prompt_data
,
int
):
prompt
=
TokensPrompt
(
prompt_token_ids
=
prompt_data
)
else
:
prompt
=
TextPrompt
(
prompt
=
prompt_data
)
prompt
=
TokensPrompt
(
prompt_token_ids
=
prompt_token_ids
)
if
mm_data
is
not
None
:
prompt
[
"multi_modal_data"
]
=
mm_data
...
...
@@ -1059,8 +1062,6 @@ class LLM:
if
len
(
encoded_output_1
)
==
1
:
encoded_output_1
=
encoded_output_1
*
len
(
encoded_output_2
)
scores
:
list
[
PoolingRequestOutput
]
=
[]
scores
=
_cosine_similarity
(
tokenizer
=
tokenizer
,
embed_1
=
encoded_output_1
,
embed_2
=
encoded_output_2
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment