"vscode:/vscode.git/clone" did not exist on "a8f28ecb63ee01c33ea9f6986102136743d47ec2"
Unverified Commit 361ea8d9 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Fix openai protocols and pass top_k, min_p (#2499)

parent 33c5ff28
...@@ -510,6 +510,8 @@ def v1_generate_request( ...@@ -510,6 +510,8 @@ def v1_generate_request(
"stop": request.stop, "stop": request.stop,
"stop_token_ids": request.stop_token_ids, "stop_token_ids": request.stop_token_ids,
"top_p": request.top_p, "top_p": request.top_p,
"top_k": request.top_k,
"min_p": request.min_p,
"presence_penalty": request.presence_penalty, "presence_penalty": request.presence_penalty,
"frequency_penalty": request.frequency_penalty, "frequency_penalty": request.frequency_penalty,
"repetition_penalty": request.repetition_penalty, "repetition_penalty": request.repetition_penalty,
...@@ -926,6 +928,8 @@ def v1_chat_generate_request( ...@@ -926,6 +928,8 @@ def v1_chat_generate_request(
"stop": stop, "stop": stop,
"stop_token_ids": request.stop_token_ids, "stop_token_ids": request.stop_token_ids,
"top_p": request.top_p, "top_p": request.top_p,
"top_k": request.top_k,
"min_p": request.min_p,
"presence_penalty": request.presence_penalty, "presence_penalty": request.presence_penalty,
"frequency_penalty": request.frequency_penalty, "frequency_penalty": request.frequency_penalty,
"repetition_penalty": request.repetition_penalty, "repetition_penalty": request.repetition_penalty,
......
...@@ -166,17 +166,19 @@ class CompletionRequest(BaseModel): ...@@ -166,17 +166,19 @@ class CompletionRequest(BaseModel):
temperature: float = 1.0 temperature: float = 1.0
top_p: float = 1.0 top_p: float = 1.0
user: Optional[str] = None user: Optional[str] = None
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
# Extra parameters for SRT backend only and will be ignored by OpenAI models. # Extra parameters for SRT backend only and will be ignored by OpenAI models.
json_schema: Optional[str] = None top_k: int = -1
regex: Optional[str] = None min_p: float = 0.0
min_tokens: int = 0 min_tokens: int = 0
regex: Optional[str] = None
json_schema: Optional[str] = None
repetition_penalty: float = 1.0 repetition_penalty: float = 1.0
stop_token_ids: Optional[List[int]] = None stop_token_ids: Optional[List[int]] = None
no_stop_trim: bool = False no_stop_trim: bool = False
ignore_eos: bool = False ignore_eos: bool = False
skip_special_tokens: bool = True skip_special_tokens: bool = True
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
class CompletionResponseChoice(BaseModel): class CompletionResponseChoice(BaseModel):
...@@ -276,14 +278,16 @@ class ChatCompletionRequest(BaseModel): ...@@ -276,14 +278,16 @@ class ChatCompletionRequest(BaseModel):
user: Optional[str] = None user: Optional[str] = None
# Extra parameters for SRT backend only and will be ignored by OpenAI models. # Extra parameters for SRT backend only and will be ignored by OpenAI models.
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None top_k: int = -1
regex: Optional[str] = None min_p: float = 0.0
min_tokens: int = 0 min_tokens: int = 0
regex: Optional[str] = None
repetition_penalty: float = 1.0 repetition_penalty: float = 1.0
stop_token_ids: Optional[List[int]] = None stop_token_ids: Optional[List[int]] = None
no_stop_trim: bool = False no_stop_trim: bool = False
ignore_eos: bool = False ignore_eos: bool = False
skip_special_tokens: bool = True skip_special_tokens: bool = True
lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None
class ChatMessage(BaseModel): class ChatMessage(BaseModel):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment