Unverified Commit 60dc62dc authored by Roy's avatar Roy Committed by GitHub
Browse files

add custom server params (#1868)

parent 0f90effc
...@@ -253,8 +253,10 @@ async def create_chat_completion(request: ChatCompletionRequest, ...@@ -253,8 +253,10 @@ async def create_chat_completion(request: ChatCompletionRequest,
n=request.n, n=request.n,
presence_penalty=request.presence_penalty, presence_penalty=request.presence_penalty,
frequency_penalty=request.frequency_penalty, frequency_penalty=request.frequency_penalty,
repetition_penalty=request.repetition_penalty,
temperature=request.temperature, temperature=request.temperature,
top_p=request.top_p, top_p=request.top_p,
min_p=request.min_p,
stop=request.stop, stop=request.stop,
stop_token_ids=request.stop_token_ids, stop_token_ids=request.stop_token_ids,
max_tokens=request.max_tokens, max_tokens=request.max_tokens,
...@@ -497,9 +499,11 @@ async def create_completion(request: CompletionRequest, raw_request: Request): ...@@ -497,9 +499,11 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
best_of=request.best_of, best_of=request.best_of,
presence_penalty=request.presence_penalty, presence_penalty=request.presence_penalty,
frequency_penalty=request.frequency_penalty, frequency_penalty=request.frequency_penalty,
repetition_penalty=request.repetition_penalty,
temperature=request.temperature, temperature=request.temperature,
top_p=request.top_p, top_p=request.top_p,
top_k=request.top_k, top_k=request.top_k,
min_p=request.min_p,
stop=request.stop, stop=request.stop,
stop_token_ids=request.stop_token_ids, stop_token_ids=request.stop_token_ids,
ignore_eos=request.ignore_eos, ignore_eos=request.ignore_eos,
......
...@@ -75,6 +75,8 @@ class ChatCompletionRequest(BaseModel): ...@@ -75,6 +75,8 @@ class ChatCompletionRequest(BaseModel):
spaces_between_special_tokens: Optional[bool] = True spaces_between_special_tokens: Optional[bool] = True
add_generation_prompt: Optional[bool] = True add_generation_prompt: Optional[bool] = True
echo: Optional[bool] = False echo: Optional[bool] = False
repetition_penalty: Optional[float] = 1.0
min_p: Optional[float] = 0.0
class CompletionRequest(BaseModel): class CompletionRequest(BaseModel):
...@@ -102,6 +104,8 @@ class CompletionRequest(BaseModel): ...@@ -102,6 +104,8 @@ class CompletionRequest(BaseModel):
stop_token_ids: Optional[List[int]] = Field(default_factory=list) stop_token_ids: Optional[List[int]] = Field(default_factory=list)
skip_special_tokens: Optional[bool] = True skip_special_tokens: Optional[bool] = True
spaces_between_special_tokens: Optional[bool] = True spaces_between_special_tokens: Optional[bool] = True
repetition_penalty: Optional[float] = 1.0
min_p: Optional[float] = 0.0
class LogProbs(BaseModel): class LogProbs(BaseModel):
......
...@@ -149,6 +149,7 @@ class SamplingParams: ...@@ -149,6 +149,7 @@ class SamplingParams:
# Zero temperature means greedy sampling. # Zero temperature means greedy sampling.
self.top_p = 1.0 self.top_p = 1.0
self.top_k = -1 self.top_k = -1
self.min_p = 0.0
self._verify_greedy_sampling() self._verify_greedy_sampling()
def _verify_args(self) -> None: def _verify_args(self) -> None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment