Revert "bugfix: Fix output_ids extraction in detokenizer_manager" (#9467)

6c855db8 · Xinyuan Tong · GitHub · 0f9318f7 · 6c855db8 · 6c855db8
Unverified Commit 6c855db8 authored Aug 22, 2025 by Xinyuan Tong Committed by GitHub Aug 21, 2025
Showing with 19 additions and 2 deletions

python/sglang/srt/entrypoints/context.py python/sglang/srt/entrypoints/context.py +18 -1

python/sglang/srt/managers/detokenizer_manager.py python/sglang/srt/managers/detokenizer_manager.py +1 -1

No files found.
--- a/python/sglang/srt/entrypoints/context.py
+++ b/python/sglang/srt/entrypoints/context.py
 # SPDX-License-Identifier: Apache-2.0
-# Copied from vLLM: https://github.com/zyongye/vllm/blob/6a70830065701b163e36a86fd331b41b5feac401/vllm/entrypoints/context.py
+# Copied from vLLM
 import json
 import logging
 from abc import ABC, abstractmethod
@@ -83,6 +83,14 @@ class HarmonyContext(ConversationContext):
        if isinstance(output, dict) and "output_ids" in output:
            output_token_ids = output["output_ids"]
+            # TODO: REMOVE here:
+            # Very hacky, find the first occurrence of token 200006 and cut from there
+            try:
+                start_index = output_token_ids.index(200006)
+                output_token_ids = output_token_ids[start_index:]
+            except ValueError:
+                pass
            for token_id in output_token_ids:
                self.parser.process(token_id)
            output_msgs = self.parser.messages
@@ -190,6 +198,15 @@ class StreamingHarmonyContext(HarmonyContext):
            # RequestOutput from SGLang with outputs
            output_token_ids = output["output_ids"]
+            # TODO: REMOVE here:
+            # Very hacky, find the first occurrence of token 200006 and cut from there
+            # Find the first occurrence of token 200006 and cut from there
+            try:
+                start_index = output_token_ids.index(200006)
+                output_token_ids = output_token_ids[start_index:]
+            except ValueError:
+                pass
            for token_id in output_token_ids:
                self.parser.process(token_id)

--- a/python/sglang/srt/managers/detokenizer_manager.py
+++ b/python/sglang/srt/managers/detokenizer_manager.py
@@ -216,7 +216,7 @@ class DetokenizerManager:
            rids=recv_obj.rids,
            finished_reasons=recv_obj.finished_reasons,
            output_strs=output_strs,
-            output_ids=recv_obj.output_ids,
+            output_ids=recv_obj.decode_ids,
            prompt_tokens=recv_obj.prompt_tokens,
            completion_tokens=recv_obj.completion_tokens,
            cached_tokens=recv_obj.cached_tokens,