bugfix: Fix output_ids extraction in detokenizer_manager (#9047)

a6452b71 · Chang Su · GitHub · f4ae50e9 · a6452b71 · a6452b71
Unverified Commit a6452b71 authored Aug 11, 2025 by Chang Su Committed by GitHub Aug 11, 2025
Showing with 2 additions and 19 deletions

python/sglang/srt/entrypoints/context.py python/sglang/srt/entrypoints/context.py +1 -18

python/sglang/srt/managers/detokenizer_manager.py python/sglang/srt/managers/detokenizer_manager.py +1 -1

No files found.
--- a/python/sglang/srt/entrypoints/context.py
+++ b/python/sglang/srt/entrypoints/context.py
 # SPDX-License-Identifier: Apache-2.0
-# Copied from vLLM
+# Copied from vLLM: https://github.com/zyongye/vllm/blob/6a70830065701b163e36a86fd331b41b5feac401/vllm/entrypoints/context.py
 import json
 import logging
 from abc import ABC, abstractmethod
@@ -83,14 +83,6 @@ class HarmonyContext(ConversationContext):
        if isinstance(output, dict) and "output_ids" in output:
            output_token_ids = output["output_ids"]
-            # TODO: REMOVE here:
-            # Very hacky, find the first occurrence of token 200006 and cut from there
-            try:
-                start_index = output_token_ids.index(200006)
-                output_token_ids = output_token_ids[start_index:]
-            except ValueError:
-                pass
            for token_id in output_token_ids:
                self.parser.process(token_id)
            output_msgs = self.parser.messages
@@ -196,15 +188,6 @@ class StreamingHarmonyContext(HarmonyContext):
            # RequestOutput from SGLang with outputs
            output_token_ids = output["output_ids"]
-            # TODO: REMOVE here:
-            # Very hacky, find the first occurrence of token 200006 and cut from there
-            # Find the first occurrence of token 200006 and cut from there
-            try:
-                start_index = output_token_ids.index(200006)
-                output_token_ids = output_token_ids[start_index:]
-            except ValueError:
-                pass
            for token_id in output_token_ids:
                self.parser.process(token_id)

--- a/python/sglang/srt/managers/detokenizer_manager.py
+++ b/python/sglang/srt/managers/detokenizer_manager.py
@@ -216,7 +216,7 @@ class DetokenizerManager:
            rids=recv_obj.rids,
            finished_reasons=recv_obj.finished_reasons,
            output_strs=output_strs,
-            output_ids=recv_obj.decode_ids,
+            output_ids=recv_obj.output_ids,
            prompt_tokens=recv_obj.prompt_tokens,
            completion_tokens=recv_obj.completion_tokens,
            cached_tokens=recv_obj.cached_tokens,