Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0150a106
Unverified
Commit
0150a106
authored
May 17, 2024
by
bofeng huang
Committed by
GitHub
May 16, 2024
Browse files
[Frontend] OpenAI API server: Do not add bos token by default when encoding (#4688)
parent
8e7fb5d4
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
12 deletions
+22
-12
vllm/entrypoints/openai/serving_chat.py
vllm/entrypoints/openai/serving_chat.py
+1
-1
vllm/entrypoints/openai/serving_engine.py
vllm/entrypoints/openai/serving_engine.py
+21
-11
No files found.
vllm/entrypoints/openai/serving_chat.py
View file @
0150a106
...
...
@@ -158,7 +158,7 @@ class OpenAIServingChat(OpenAIServing):
try
:
# Tokenize/detokenize depending on prompt format (string/token list)
prompt_ids
,
prompt_text
=
self
.
_validate_prompt_and_tokenize
(
request
,
prompt
=
prompt
)
request
,
prompt
=
prompt
,
add_special_tokens
=
False
)
sampling_params
=
request
.
to_sampling_params
()
lora_request
=
self
.
_maybe_get_lora
(
request
)
decoding_config
=
await
self
.
engine
.
get_decoding_config
()
...
...
vllm/entrypoints/openai/serving_engine.py
View file @
0150a106
import
json
from
dataclasses
import
dataclass
from
http
import
HTTPStatus
from
typing
import
Dict
,
List
,
Optional
,
Tuple
,
Union
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Tuple
,
Union
from
pydantic
import
Field
from
typing_extensions
import
Annotated
...
...
@@ -170,8 +170,9 @@ class OpenAIServing:
EmbeddingRequest
],
prompt
:
Optional
[
str
]
=
None
,
prompt_ids
:
Optional
[
List
[
int
]]
=
None
,
truncate_prompt_tokens
:
Optional
[
Annotated
[
int
,
Field
(
ge
=
1
)]]
=
None
)
->
Tuple
[
List
[
int
],
str
]:
truncate_prompt_tokens
:
Optional
[
Annotated
[
int
,
Field
(
ge
=
1
)]]
=
None
,
add_special_tokens
:
bool
=
True
)
->
Tuple
[
List
[
int
],
str
]:
if
not
(
prompt
or
prompt_ids
):
raise
ValueError
(
"Either prompt or prompt_ids should be provided."
)
if
(
prompt
and
prompt_ids
):
...
...
@@ -179,10 +180,19 @@ class OpenAIServing:
"Only one of prompt or prompt_ids should be provided."
)
if
prompt_ids
is
None
:
tokenizer_kwargs
=
{}
if
truncate_prompt_tokens
is
None
else
{
# When using OpenAIServingChat for chat completions, the
# special tokens (e.g., BOS) have already been added by the
# chat template. Therefore, we do not need to add them again.
# Set add_special_tokens to False to avoid adding the BOS tokens
# again.
tokenizer_kwargs
:
Dict
[
str
,
Any
]
=
{
"add_special_tokens"
:
add_special_tokens
}
if
truncate_prompt_tokens
is
not
None
:
tokenizer_kwargs
.
update
({
"truncation"
:
True
,
"max_length"
:
truncate_prompt_tokens
,
}
})
input_ids
=
self
.
tokenizer
(
prompt
,
**
tokenizer_kwargs
).
input_ids
elif
truncate_prompt_tokens
is
not
None
:
input_ids
=
prompt_ids
[
-
truncate_prompt_tokens
:]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment