remove usage of stop_token_ids

Signed-off-by: Ceng23333 <441651826@qq.com>

remove usage of stop_token_ids
Signed-off-by: Ceng23333 <441651826@qq.com>
0cddb99e · Ceng23333 · bc93c648 · 0cddb99e · 0cddb99e · 0cddb99e
Commit 0cddb99e authored Feb 04, 2026 by Ceng23333
3 changed files
--- a/python/infinilm/llm/llm.py
+++ b/python/infinilm/llm/llm.py
@@ -320,12 +320,6 @@ class LLMEngine:
                req.finish_reason = FinishReason.STOP_STRING
                return True

-        # Check stop token IDs
-        stop_token_ids = req.sampling_params.stop_token_ids or []
-        if stop_token_ids and token_id in stop_token_ids:
-            req.finish_reason = FinishReason.STOP_STRING
-            return True
-
        return False

    def tokenize(self, text: str) -> List[int]:

--- a/python/infinilm/llm/sampling_params.py
+++ b/python/infinilm/llm/sampling_params.py
@@ -15,7 +15,7 @@ class SamplingParams:
    top_k: int = 1
    max_tokens: Optional[int] = None
    stop: Optional[List[str]] = None
-    stop_token_ids: Optional[List[int]] = None
+    stop_token_ids: Optional[List[int]] = None  # Placeholder for future usage, not currently handled

    def __post_init__(self):
        if self.stop is None:

--- a/python/infinilm/server/inference_server.py
+++ b/python/infinilm/server/inference_server.py
@@ -233,9 +233,6 @@ class InferenceServer:
        if isinstance(stop, str):
            stop = [stop]

-        stop_token_ids = pick("stop_token_ids", None)
-        if isinstance(stop_token_ids, int):
-            stop_token_ids = [stop_token_ids]

        return SamplingParams(
            temperature=float(pick("temperature", self.temperature)),
@@ -243,7 +240,6 @@ class InferenceServer:
            top_k=int(pick("top_k", self.top_k)),
            max_tokens=int(max_tokens) if max_tokens is not None else None,
            stop=stop,
-            stop_token_ids=stop_token_ids,
        )

    async def _stream_chat(self, request_id: str, data: dict, http_request: Request):