Commit 0cddb99e authored by Ceng23333's avatar Ceng23333
Browse files

remove usage of stop_token_ids


Signed-off-by: default avatarCeng23333 <441651826@qq.com>
parent bc93c648
...@@ -320,12 +320,6 @@ class LLMEngine: ...@@ -320,12 +320,6 @@ class LLMEngine:
req.finish_reason = FinishReason.STOP_STRING req.finish_reason = FinishReason.STOP_STRING
return True return True
# Check stop token IDs
stop_token_ids = req.sampling_params.stop_token_ids or []
if stop_token_ids and token_id in stop_token_ids:
req.finish_reason = FinishReason.STOP_STRING
return True
return False return False
def tokenize(self, text: str) -> List[int]: def tokenize(self, text: str) -> List[int]:
......
...@@ -15,7 +15,7 @@ class SamplingParams: ...@@ -15,7 +15,7 @@ class SamplingParams:
top_k: int = 1 top_k: int = 1
max_tokens: Optional[int] = None max_tokens: Optional[int] = None
stop: Optional[List[str]] = None stop: Optional[List[str]] = None
stop_token_ids: Optional[List[int]] = None stop_token_ids: Optional[List[int]] = None # Placeholder for future usage, not currently handled
def __post_init__(self): def __post_init__(self):
if self.stop is None: if self.stop is None:
......
...@@ -233,9 +233,6 @@ class InferenceServer: ...@@ -233,9 +233,6 @@ class InferenceServer:
if isinstance(stop, str): if isinstance(stop, str):
stop = [stop] stop = [stop]
stop_token_ids = pick("stop_token_ids", None)
if isinstance(stop_token_ids, int):
stop_token_ids = [stop_token_ids]
return SamplingParams( return SamplingParams(
temperature=float(pick("temperature", self.temperature)), temperature=float(pick("temperature", self.temperature)),
...@@ -243,7 +240,6 @@ class InferenceServer: ...@@ -243,7 +240,6 @@ class InferenceServer:
top_k=int(pick("top_k", self.top_k)), top_k=int(pick("top_k", self.top_k)),
max_tokens=int(max_tokens) if max_tokens is not None else None, max_tokens=int(max_tokens) if max_tokens is not None else None,
stop=stop, stop=stop,
stop_token_ids=stop_token_ids,
) )
async def _stream_chat(self, request_id: str, data: dict, http_request: Request): async def _stream_chat(self, request_id: str, data: dict, http_request: Request):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment