Unverified Commit 05ee2192 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

Support `max_completion_tokens` for OpenAIChatCompletions (#5857)

parent dcae1fb2
...@@ -1099,7 +1099,7 @@ def v1_chat_generate_request( ...@@ -1099,7 +1099,7 @@ def v1_chat_generate_request(
sampling_params = { sampling_params = {
"temperature": request.temperature, "temperature": request.temperature,
"max_new_tokens": request.max_tokens, "max_new_tokens": request.max_tokens or request.max_completion_tokens,
"min_new_tokens": request.min_tokens, "min_new_tokens": request.min_tokens,
"stop": stop, "stop": stop,
"stop_token_ids": request.stop_token_ids, "stop_token_ids": request.stop_token_ids,
......
...@@ -320,7 +320,16 @@ class ChatCompletionRequest(BaseModel): ...@@ -320,7 +320,16 @@ class ChatCompletionRequest(BaseModel):
logit_bias: Optional[Dict[str, float]] = None logit_bias: Optional[Dict[str, float]] = None
logprobs: bool = False logprobs: bool = False
top_logprobs: Optional[int] = None top_logprobs: Optional[int] = None
max_tokens: Optional[int] = None max_tokens: Optional[int] = Field(
default=None,
deprecated="max_tokens is deprecated in favor of the max_completion_tokens field",
description="The maximum number of tokens that can be generated in the chat completion. ",
)
max_completion_tokens: Optional[int] = Field(
default=None,
description="The maximum number of completion tokens for a chat completion request, "
"including visible output tokens and reasoning tokens. Input tokens are not included. ",
)
n: int = 1 n: int = 1
presence_penalty: float = 0.0 presence_penalty: float = 0.0
response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat]] = None response_format: Optional[Union[ResponseFormat, StructuralTagResponseFormat]] = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment