"docs/source/api/vscode:/vscode.git/clone" did not exist on "d9c25521bcbdbcaa6d2927ce04df0eeb59bafa99"
Unverified Commit f8ca2368 authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

fix: kimi k2 xgrammar crash (#8367)


Co-authored-by: default avatarcicirori <32845984+cicirori@users.noreply.github.com>
Co-authored-by: default avatargongwei-130 <56567052+gongwei-130@users.noreply.github.com>
parent d8ee1564
...@@ -431,6 +431,7 @@ class Req: ...@@ -431,6 +431,7 @@ class Req:
bootstrap_port: Optional[int] = None, bootstrap_port: Optional[int] = None,
bootstrap_room: Optional[int] = None, bootstrap_room: Optional[int] = None,
data_parallel_rank: Optional[int] = None, data_parallel_rank: Optional[int] = None,
vocab_size: Optional[int] = None,
): ):
# Input and output info # Input and output info
self.rid = rid self.rid = rid
...@@ -480,6 +481,7 @@ class Req: ...@@ -480,6 +481,7 @@ class Req:
self.to_abort_message: str = None self.to_abort_message: str = None
self.stream = stream self.stream = stream
self.eos_token_ids = eos_token_ids self.eos_token_ids = eos_token_ids
self.vocab_size = vocab_size
# For incremental decoding # For incremental decoding
# ----- | --------- read_ids -------| # ----- | --------- read_ids -------|
...@@ -713,6 +715,14 @@ class Req: ...@@ -713,6 +715,14 @@ class Req:
self.finished_reason = FINISH_MATCHED_TOKEN(matched=last_token_id) self.finished_reason = FINISH_MATCHED_TOKEN(matched=last_token_id)
return return
if last_token_id > self.vocab_size or last_token_id < 0:
if self.sampling_params.stop_token_ids:
self.output_ids[-1] = next(iter(self.sampling_params.stop_token_ids))
if self.eos_token_ids:
self.output_ids[-1] = next(iter(self.eos_token_ids))
self.finished_reason = FINISH_MATCHED_STR(matched="NaN happened")
return
# Check stop strings # Check stop strings
if len(self.sampling_params.stop_strs) > 0: if len(self.sampling_params.stop_strs) > 0:
tail_str = self.tokenizer.decode( tail_str = self.tokenizer.decode(
......
...@@ -1129,6 +1129,7 @@ class Scheduler( ...@@ -1129,6 +1129,7 @@ class Scheduler(
bootstrap_port=recv_req.bootstrap_port, bootstrap_port=recv_req.bootstrap_port,
bootstrap_room=recv_req.bootstrap_room, bootstrap_room=recv_req.bootstrap_room,
data_parallel_rank=recv_req.data_parallel_rank, data_parallel_rank=recv_req.data_parallel_rank,
vocab_size=self.model_config.vocab_size,
) )
req.tokenizer = self.tokenizer req.tokenizer = self.tokenizer
...@@ -1395,8 +1396,10 @@ class Scheduler( ...@@ -1395,8 +1396,10 @@ class Scheduler(
logger.info(f) logger.info(f)
if self.enable_metrics: if self.enable_metrics:
cache_hit_rate = adder.log_hit_tokens / ( total_tokens = adder.log_input_tokens + adder.log_hit_tokens
adder.log_input_tokens + adder.log_hit_tokens
cache_hit_rate = (
adder.log_hit_tokens / total_tokens if total_tokens > 0 else 0.0
) )
self.stats.num_running_reqs = running_bs self.stats.num_running_reqs = running_bs
self.stats.num_used_tokens = num_used self.stats.num_used_tokens = num_used
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment