Fix missing additional_stop_token_ids (#1769)

0d800090 · Lianmin Zheng · GitHub · b7d05594 · 0d800090 · 0d800090
Unverified Commit 0d800090 authored Oct 23, 2024 by Lianmin Zheng Committed by GitHub Oct 23, 2024
3 changed files
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -9,5 +9,5 @@
 /python/sglang/srt/models @merrymercy @Ying1123 @hnyls2002 @zhyncs @ispobock @ByronHsu
 /python/sglang/srt/openai_api @merrymercy @Ying1123 @hnyls2002 @zhyncs @ispobock @ByronHsu
 /python/sglang/srt/sampling @merrymercy @hnyls2002
-/test/lang @merrymercy @Ying1123 @hnyls2002 @ByronHsu
-/test/srt @merrymercy @Ying1123 @hnyls2002 @zhyncs @ispobock @ByronHsu
+/test/lang @merrymercy @Ying1123 @ByronHsu
+/test/srt @merrymercy @Ying1123 @zhyncs
--- a/python/sglang/srt/hf_transformers_utils.py
+++ b/python/sglang/srt/hf_transformers_utils.py
@@ -164,14 +164,7 @@ def get_tokenizer(
            "slowdown. Consider using a fast tokenizer instead."
        )

-    # Special handling for stop token <|eom_id|> generated by llama 3 tool use.
-    if "<|eom_id|>" in tokenizer.get_added_vocab():
-        tokenizer.additional_stop_token_ids = set(
-            [tokenizer.get_added_vocab()["<|eom_id|>"]]
-        )
-    else:
-        tokenizer.additional_stop_token_ids = None
-
+    handle_additional_stop_token_ids(tokenizer)
    return tokenizer


@@ -190,4 +183,16 @@ def get_processor(
        tokenizer_revision=tokenizer_revision,
        **kwargs,
    )
+
+    handle_additional_stop_token_ids(processor.tokenizer)
    return processor
+
+
+def handle_additional_stop_token_ids(tokenizer):
+    # Special handling for stop token <|eom_id|> generated by llama 3 tool use.
+    if "<|eom_id|>" in tokenizer.get_added_vocab():
+        tokenizer.additional_stop_token_ids = set(
+            [tokenizer.get_added_vocab()["<|eom_id|>"]]
+        )
+    else:
+        tokenizer.additional_stop_token_ids = None
--- a/python/sglang/srt/sampling/sampling_params.py
+++ b/python/sglang/srt/sampling/sampling_params.py
@@ -135,7 +135,7 @@ class SamplingParams:
            self.stop_str_max_len = stop_str_max_len

        # Process stop token ids
-        if tokenizer.additional_stop_token_ids:
+        if tokenizer and tokenizer.additional_stop_token_ids:
            self.stop_token_ids.update(tokenizer.additional_stop_token_ids)

    def to_srt_kwargs(self):