Unverified Commit ea53c63b authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Expose no_stop_trim and skip_special_tokens in openai api (#2039)

parent a10d5309
...@@ -516,8 +516,9 @@ def v1_generate_request( ...@@ -516,8 +516,9 @@ def v1_generate_request(
"regex": request.regex, "regex": request.regex,
"json_schema": request.json_schema, "json_schema": request.json_schema,
"n": request.n, "n": request.n,
"ignore_eos": request.ignore_eos,
"no_stop_trim": request.no_stop_trim, "no_stop_trim": request.no_stop_trim,
"ignore_eos": request.ignore_eos,
"skip_special_tokens": request.skip_special_tokens,
} }
) )
return_logprobs.append(request.logprobs is not None and request.logprobs > 0) return_logprobs.append(request.logprobs is not None and request.logprobs > 0)
...@@ -928,7 +929,9 @@ def v1_chat_generate_request( ...@@ -928,7 +929,9 @@ def v1_chat_generate_request(
"repetition_penalty": request.repetition_penalty, "repetition_penalty": request.repetition_penalty,
"regex": request.regex, "regex": request.regex,
"n": request.n, "n": request.n,
"no_stop_trim": request.no_stop_trim,
"ignore_eos": request.ignore_eos, "ignore_eos": request.ignore_eos,
"skip_special_tokens": request.skip_special_tokens,
} }
if request.response_format and request.response_format.type == "json_schema": if request.response_format and request.response_format.type == "json_schema":
sampling_params["json_schema"] = convert_json_schema_to_str( sampling_params["json_schema"] = convert_json_schema_to_str(
......
...@@ -36,7 +36,7 @@ class ModelList(BaseModel): ...@@ -36,7 +36,7 @@ class ModelList(BaseModel):
"""Model list consists of model cards.""" """Model list consists of model cards."""
object: str = "list" object: str = "list"
data: List[ModelCard] = [] data: List[ModelCard] = Field(default_factory=list)
class ErrorResponse(BaseModel): class ErrorResponse(BaseModel):
...@@ -143,7 +143,7 @@ class BatchResponse(BaseModel): ...@@ -143,7 +143,7 @@ class BatchResponse(BaseModel):
expired_at: Optional[int] = None expired_at: Optional[int] = None
cancelling_at: Optional[int] = None cancelling_at: Optional[int] = None
cancelled_at: Optional[int] = None cancelled_at: Optional[int] = None
request_counts: dict = {"total": 0, "completed": 0, "failed": 0} request_counts: Optional[dict] = None
metadata: Optional[dict] = None metadata: Optional[dict] = None
...@@ -153,30 +153,31 @@ class CompletionRequest(BaseModel): ...@@ -153,30 +153,31 @@ class CompletionRequest(BaseModel):
model: str model: str
prompt: Union[List[int], List[List[int]], str, List[str]] prompt: Union[List[int], List[List[int]], str, List[str]]
best_of: Optional[int] = None best_of: Optional[int] = None
echo: Optional[bool] = False echo: bool = False
frequency_penalty: Optional[float] = 0.0 frequency_penalty: float = 0.0
logit_bias: Optional[Dict[str, float]] = None logit_bias: Optional[Dict[str, float]] = None
logprobs: Optional[int] = None logprobs: Optional[int] = None
max_tokens: Optional[int] = 16 max_tokens: int = 16
n: int = 1 n: int = 1
presence_penalty: Optional[float] = 0.0 presence_penalty: float = 0.0
seed: Optional[int] = None seed: Optional[int] = None
stop: Optional[Union[str, List[str]]] = Field(default_factory=list) stop: Optional[Union[str, List[str]]] = None
stream: Optional[bool] = False stream: bool = False
stream_options: Optional[StreamOptions] = None stream_options: Optional[StreamOptions] = None
suffix: Optional[str] = None suffix: Optional[str] = None
temperature: Optional[float] = 1.0 temperature: float = 1.0
top_p: Optional[float] = 1.0 top_p: float = 1.0
user: Optional[str] = None user: Optional[str] = None
# Extra parameters for SRT backend only and will be ignored by OpenAI models. # Extra parameters for SRT backend only and will be ignored by OpenAI models.
regex: Optional[str] = None
json_schema: Optional[str] = None json_schema: Optional[str] = None
ignore_eos: bool = False regex: Optional[str] = None
min_tokens: int = 0 min_tokens: int = 0
repetition_penalty: Optional[float] = 1.0 repetition_penalty: float = 1.0
stop_token_ids: Optional[List[int]] = Field(default_factory=list) stop_token_ids: Optional[List[int]] = None
no_stop_trim: Union[bool, List[bool]] = False no_stop_trim: bool = False
ignore_eos: bool = False
skip_special_tokens: bool = True
class CompletionResponseChoice(BaseModel): class CompletionResponseChoice(BaseModel):
...@@ -259,28 +260,30 @@ class ChatCompletionRequest(BaseModel): ...@@ -259,28 +260,30 @@ class ChatCompletionRequest(BaseModel):
# https://platform.openai.com/docs/api-reference/chat/create # https://platform.openai.com/docs/api-reference/chat/create
messages: List[ChatCompletionMessageParam] messages: List[ChatCompletionMessageParam]
model: str model: str
frequency_penalty: Optional[float] = 0.0 frequency_penalty: float = 0.0
logit_bias: Optional[Dict[str, float]] = None logit_bias: Optional[Dict[str, float]] = None
logprobs: Optional[bool] = False logprobs: bool = False
top_logprobs: Optional[int] = None top_logprobs: Optional[int] = None
max_tokens: Optional[int] = None max_tokens: Optional[int] = None
n: Optional[int] = 1 n: int = 1
presence_penalty: Optional[float] = 0.0 presence_penalty: float = 0.0
response_format: Optional[ResponseFormat] = None response_format: Optional[ResponseFormat] = None
seed: Optional[int] = None seed: Optional[int] = None
stop: Optional[Union[str, List[str]]] = Field(default_factory=list) stop: Optional[Union[str, List[str]]] = None
stream: Optional[bool] = False stream: bool = False
stream_options: Optional[StreamOptions] = None stream_options: Optional[StreamOptions] = None
temperature: Optional[float] = 0.7 temperature: float = 0.7
top_p: Optional[float] = 1.0 top_p: float = 1.0
user: Optional[str] = None user: Optional[str] = None
# Extra parameters for SRT backend only and will be ignored by OpenAI models. # Extra parameters for SRT backend only and will be ignored by OpenAI models.
regex: Optional[str] = None regex: Optional[str] = None
min_tokens: Optional[int] = 0 min_tokens: int = 0
repetition_penalty: Optional[float] = 1.0 repetition_penalty: float = 1.0
stop_token_ids: Optional[List[int]] = Field(default_factory=list) stop_token_ids: Optional[List[int]] = None
no_stop_trim: bool = False
ignore_eos: bool = False ignore_eos: bool = False
skip_special_tokens: bool = True
class ChatMessage(BaseModel): class ChatMessage(BaseModel):
......
...@@ -34,13 +34,13 @@ class SamplingParams: ...@@ -34,13 +34,13 @@ class SamplingParams:
frequency_penalty: float = 0.0, frequency_penalty: float = 0.0,
presence_penalty: float = 0.0, presence_penalty: float = 0.0,
repetition_penalty: float = 1.0, repetition_penalty: float = 1.0,
ignore_eos: bool = False,
skip_special_tokens: bool = True,
spaces_between_special_tokens: bool = True, spaces_between_special_tokens: bool = True,
regex: Optional[str] = None, regex: Optional[str] = None,
n: int = 1, n: int = 1,
json_schema: Optional[str] = None, json_schema: Optional[str] = None,
no_stop_trim: bool = False, no_stop_trim: bool = False,
ignore_eos: bool = False,
skip_special_tokens: bool = True,
) -> None: ) -> None:
self.temperature = temperature self.temperature = temperature
self.top_p = top_p self.top_p = top_p
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment