Unverified Commit 0150a106 authored by bofeng huang's avatar bofeng huang Committed by GitHub
Browse files

[Frontend] OpenAI API server: Do not add bos token by default when encoding (#4688)

parent 8e7fb5d4
...@@ -158,7 +158,7 @@ class OpenAIServingChat(OpenAIServing): ...@@ -158,7 +158,7 @@ class OpenAIServingChat(OpenAIServing):
try: try:
# Tokenize/detokenize depending on prompt format (string/token list) # Tokenize/detokenize depending on prompt format (string/token list)
prompt_ids, prompt_text = self._validate_prompt_and_tokenize( prompt_ids, prompt_text = self._validate_prompt_and_tokenize(
request, prompt=prompt) request, prompt=prompt, add_special_tokens=False)
sampling_params = request.to_sampling_params() sampling_params = request.to_sampling_params()
lora_request = self._maybe_get_lora(request) lora_request = self._maybe_get_lora(request)
decoding_config = await self.engine.get_decoding_config() decoding_config = await self.engine.get_decoding_config()
......
import json import json
from dataclasses import dataclass from dataclasses import dataclass
from http import HTTPStatus from http import HTTPStatus
from typing import Dict, List, Optional, Tuple, Union from typing import Any, Dict, List, Optional, Tuple, Union
from pydantic import Field from pydantic import Field
from typing_extensions import Annotated from typing_extensions import Annotated
...@@ -170,8 +170,9 @@ class OpenAIServing: ...@@ -170,8 +170,9 @@ class OpenAIServing:
EmbeddingRequest], EmbeddingRequest],
prompt: Optional[str] = None, prompt: Optional[str] = None,
prompt_ids: Optional[List[int]] = None, prompt_ids: Optional[List[int]] = None,
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None truncate_prompt_tokens: Optional[Annotated[int,
) -> Tuple[List[int], str]: Field(ge=1)]] = None,
add_special_tokens: bool = True) -> Tuple[List[int], str]:
if not (prompt or prompt_ids): if not (prompt or prompt_ids):
raise ValueError("Either prompt or prompt_ids should be provided.") raise ValueError("Either prompt or prompt_ids should be provided.")
if (prompt and prompt_ids): if (prompt and prompt_ids):
...@@ -179,10 +180,19 @@ class OpenAIServing: ...@@ -179,10 +180,19 @@ class OpenAIServing:
"Only one of prompt or prompt_ids should be provided.") "Only one of prompt or prompt_ids should be provided.")
if prompt_ids is None: if prompt_ids is None:
tokenizer_kwargs = {} if truncate_prompt_tokens is None else { # When using OpenAIServingChat for chat completions, the
# special tokens (e.g., BOS) have already been added by the
# chat template. Therefore, we do not need to add them again.
# Set add_special_tokens to False to avoid adding the BOS tokens
# again.
tokenizer_kwargs: Dict[str, Any] = {
"add_special_tokens": add_special_tokens
}
if truncate_prompt_tokens is not None:
tokenizer_kwargs.update({
"truncation": True, "truncation": True,
"max_length": truncate_prompt_tokens, "max_length": truncate_prompt_tokens,
} })
input_ids = self.tokenizer(prompt, **tokenizer_kwargs).input_ids input_ids = self.tokenizer(prompt, **tokenizer_kwargs).input_ids
elif truncate_prompt_tokens is not None: elif truncate_prompt_tokens is not None:
input_ids = prompt_ids[-truncate_prompt_tokens:] input_ids = prompt_ids[-truncate_prompt_tokens:]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment