Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2c37540a
Unverified
Commit
2c37540a
authored
Jul 02, 2024
by
danieljannai21
Committed by
GitHub
Jul 01, 2024
Browse files
[Frontend] Add template related params to request (#5709)
parent
3476ed08
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
30 additions
and
1 deletion
+30
-1
requirements-common.txt
requirements-common.txt
+1
-1
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+21
-0
vllm/entrypoints/openai/serving_chat.py
vllm/entrypoints/openai/serving_chat.py
+8
-0
No files found.
requirements-common.txt
View file @
2c37540a
...
...
@@ -6,7 +6,7 @@ numpy < 2.0.0
requests
tqdm
py-cpuinfo
transformers >= 4.42.0 # Required for Gemma 2.
transformers >= 4.42.0 # Required for Gemma 2
and for additional chat template parameters
.
tokenizers >= 0.19.1 # Required for Llama 3.
fastapi
aiohttp
...
...
vllm/entrypoints/openai/protocol.py
View file @
2c37540a
...
...
@@ -190,6 +190,27 @@ class ChatCompletionRequest(OpenAIBaseModel):
"special tokens so this should be set to False (as is the "
"default)."
),
)
documents
:
Optional
[
List
[
Dict
[
str
,
str
]]]
=
Field
(
default
=
None
,
description
=
(
"A list of dicts representing documents that will be accessible to "
"the model if it is performing RAG (retrieval-augmented generation)."
" If the template does not support RAG, this argument will have no "
"effect. We recommend that each document should be a dict containing "
"
\"
title
\"
and
\"
text
\"
keys."
),
)
chat_template
:
Optional
[
str
]
=
Field
(
default
=
None
,
description
=
(
"A Jinja template to use for this conversion. "
"If this is not passed, the model's default chat template will be "
"used instead."
),
)
chat_template_kwargs
:
Optional
[
Dict
[
str
,
Any
]]
=
Field
(
default
=
None
,
description
=
(
"Additional kwargs to pass to the template renderer. "
"Will be accessible by the chat template."
),
)
include_stop_str_in_output
:
Optional
[
bool
]
=
Field
(
default
=
False
,
description
=
(
...
...
vllm/entrypoints/openai/serving_chat.py
View file @
2c37540a
...
...
@@ -218,10 +218,18 @@ class OpenAIServingChat(OpenAIServing):
conversation
.
extend
(
chat_parsed_result
.
messages
)
image_futures
.
extend
(
chat_parsed_result
.
image_futures
)
tool_dicts
=
None
if
request
.
tools
is
None
else
[
tool
.
model_dump
()
for
tool
in
request
.
tools
]
prompt
=
self
.
tokenizer
.
apply_chat_template
(
conversation
=
conversation
,
tokenize
=
False
,
add_generation_prompt
=
request
.
add_generation_prompt
,
tools
=
tool_dicts
,
documents
=
request
.
documents
,
chat_template
=
request
.
chat_template
,
**
(
request
.
chat_template_kwargs
or
{}),
)
except
Exception
as
e
:
logger
.
error
(
"Error in applying chat template from request: %s"
,
e
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment