Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
28f350e1
Unverified
Commit
28f350e1
authored
Sep 03, 2025
by
Jakub Smid
Committed by
GitHub
Sep 03, 2025
Browse files
Support add_generation_prompt in embeddings endpoint with chat request (#23931)
Signed-off-by:
biba10
<
jaksmid@seznam.cz
>
parent
51383bd4
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
3 deletions
+9
-3
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+8
-0
vllm/entrypoints/openai/serving_embedding.py
vllm/entrypoints/openai/serving_embedding.py
+1
-3
No files found.
vllm/entrypoints/openai/protocol.py
View file @
28f350e1
...
@@ -1342,6 +1342,14 @@ class EmbeddingChatRequest(OpenAIBaseModel):
...
@@ -1342,6 +1342,14 @@ class EmbeddingChatRequest(OpenAIBaseModel):
truncate_prompt_tokens
:
Optional
[
Annotated
[
int
,
Field
(
ge
=-
1
)]]
=
None
truncate_prompt_tokens
:
Optional
[
Annotated
[
int
,
Field
(
ge
=-
1
)]]
=
None
# --8<-- [start:chat-embedding-extra-params]
# --8<-- [start:chat-embedding-extra-params]
add_generation_prompt
:
bool
=
Field
(
default
=
False
,
description
=
(
"If true, the generation prompt will be added to the chat template. "
"This is a parameter used by chat template in tokenizer config of the "
"model."
),
)
add_special_tokens
:
bool
=
Field
(
add_special_tokens
:
bool
=
Field
(
default
=
False
,
default
=
False
,
description
=
(
description
=
(
...
...
vllm/entrypoints/openai/serving_embedding.py
View file @
28f350e1
...
@@ -93,9 +93,7 @@ class EmbeddingMixin(OpenAIServing):
...
@@ -93,9 +93,7 @@ class EmbeddingMixin(OpenAIServing):
or
ctx
.
chat_template
,
or
ctx
.
chat_template
,
chat_template_content_format
=
ctx
.
chat_template_content_format
=
ctx
.
chat_template_content_format
,
chat_template_content_format
,
# In embedding requests, we are not generating tokens,
add_generation_prompt
=
ctx
.
request
.
add_generation_prompt
,
# so there is no need to append extra tokens to the input
add_generation_prompt
=
False
,
continue_final_message
=
False
,
continue_final_message
=
False
,
add_special_tokens
=
ctx
.
request
.
add_special_tokens
,
add_special_tokens
=
ctx
.
request
.
add_special_tokens
,
)
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment