Unverified Commit dd487e55 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

bugfix: Fix XGrammar backend to use model's EOS tokens for constrained generation (#8422)

parent bb81daef
...@@ -168,7 +168,10 @@ class BaseGrammarBackend: ...@@ -168,7 +168,10 @@ class BaseGrammarBackend:
def create_grammar_backend( def create_grammar_backend(
server_args: ServerArgs, tokenizer, vocab_size: int server_args: ServerArgs,
tokenizer,
vocab_size: int,
eos_token_ids: Optional[set] = None,
) -> Optional[BaseGrammarBackend]: ) -> Optional[BaseGrammarBackend]:
if server_args.grammar_backend == "outlines": if server_args.grammar_backend == "outlines":
from sglang.srt.constrained.outlines_backend import OutlinesGrammarBackend from sglang.srt.constrained.outlines_backend import OutlinesGrammarBackend
...@@ -180,7 +183,12 @@ def create_grammar_backend( ...@@ -180,7 +183,12 @@ def create_grammar_backend(
elif server_args.grammar_backend == "xgrammar": elif server_args.grammar_backend == "xgrammar":
from sglang.srt.constrained.xgrammar_backend import XGrammarGrammarBackend from sglang.srt.constrained.xgrammar_backend import XGrammarGrammarBackend
grammar_backend = XGrammarGrammarBackend(tokenizer, vocab_size=vocab_size) # Convert Set[int] to List[int] if needed
eos_list = list(eos_token_ids) if eos_token_ids else None
grammar_backend = XGrammarGrammarBackend(
tokenizer, vocab_size=vocab_size, model_eos_token_ids=eos_list
)
elif server_args.grammar_backend == "llguidance": elif server_args.grammar_backend == "llguidance":
from sglang.srt.constrained.llguidance_backend import GuidanceBackend from sglang.srt.constrained.llguidance_backend import GuidanceBackend
......
...@@ -150,14 +150,16 @@ class XGrammarGrammarBackend(BaseGrammarBackend): ...@@ -150,14 +150,16 @@ class XGrammarGrammarBackend(BaseGrammarBackend):
self, self,
tokenizer, tokenizer,
vocab_size: int, vocab_size: int,
model_eos_token_ids: Optional[List[int]] = None,
): ):
super().__init__() super().__init__()
if True: # Create TokenizerInfo with model's EOS tokens as the authoritative stop tokens
tokenizer_info = TokenizerInfo.from_huggingface( # This ensures consistency between what the model considers EOS and what XGrammar uses
tokenizer, vocab_size=vocab_size tokenizer_info = TokenizerInfo.from_huggingface(
) tokenizer, vocab_size=vocab_size, stop_token_ids=model_eos_token_ids
override_stop_tokens = None )
override_stop_tokens = None
self.grammar_compiler = GrammarCompiler(tokenizer_info=tokenizer_info) self.grammar_compiler = GrammarCompiler(tokenizer_info=tokenizer_info)
self.vocab_size = vocab_size self.vocab_size = vocab_size
......
...@@ -458,7 +458,10 @@ class Scheduler( ...@@ -458,7 +458,10 @@ class Scheduler(
self.grammar_queue: List[Req] = [] self.grammar_queue: List[Req] = []
if not server_args.skip_tokenizer_init: if not server_args.skip_tokenizer_init:
self.grammar_backend = create_grammar_backend( self.grammar_backend = create_grammar_backend(
server_args, self.tokenizer, self.model_config.vocab_size server_args,
self.tokenizer,
self.model_config.vocab_size,
self.model_config.hf_eos_token_id,
) )
else: else:
self.grammar_backend = None self.grammar_backend = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment