Unverified Commit 22e00eeb authored by Shangming Cai's avatar Shangming Cai Committed by GitHub
Browse files

[Bugfix] Prevent PD server crash from invalid grammar (#8062)


Signed-off-by: default avatarShangming Cai <caishangming@linux.alibaba.com>
parent b3eac168
from __future__ import annotations from __future__ import annotations
import logging import logging
from http import HTTPStatus
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
import torch import torch
from sglang.srt.disaggregation.utils import prepare_abort
from sglang.srt.model_executor.forward_batch_info import CaptureHiddenMode, ForwardMode from sglang.srt.model_executor.forward_batch_info import CaptureHiddenMode, ForwardMode
from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
...@@ -102,7 +104,17 @@ class ScheduleBatchDisaggregationDecodeMixin: ...@@ -102,7 +104,17 @@ class ScheduleBatchDisaggregationDecodeMixin:
self.output_ids.append(req.output_ids[-1]) self.output_ids.append(req.output_ids[-1])
self.tree_cache.cache_unfinished_req(req) self.tree_cache.cache_unfinished_req(req)
if req.grammar is not None: if req.grammar is not None:
req.grammar.accept_token(req.output_ids[-1]) # FIXME: this try-except block is for handling unexpected xgrammar issue.
try:
req.grammar.accept_token(req.output_ids[-1])
except ValueError as e:
# Grammar accept_token can raise ValueError if the token is not in the grammar.
# This can happen if the grammar is not set correctly or the token is invalid.
error_message = f"Grammar accept_token failed for req {req.rid} with token {req.output_ids[-1]}: {e}"
self.tree_cache.cache_finished_req(req)
prepare_abort(
req, error_message, status_code=HTTPStatus.INTERNAL_SERVER_ERROR
)
req.grammar.finished = req.finished() req.grammar.finished = req.finished()
self.output_ids = torch.tensor(self.output_ids, device=self.device) self.output_ids = torch.tensor(self.output_ids, device=self.device)
......
...@@ -425,7 +425,19 @@ class SchedulerDisaggregationPrefillMixin: ...@@ -425,7 +425,19 @@ class SchedulerDisaggregationPrefillMixin:
self.send_kv_chunk(req, last_chunk=True) self.send_kv_chunk(req, last_chunk=True)
if req.grammar is not None: if req.grammar is not None:
req.grammar.accept_token(next_token_id) # FIXME: this try-except block is for handling unexpected xgrammar issue.
try:
req.grammar.accept_token(next_token_id)
except ValueError as e:
# Grammar accept_token can raise ValueError if the token is not in the grammar.
# This can happen if the grammar is not set correctly or the token is invalid.
error_message = f"Grammar accept_token failed for req {req.rid} with token {next_token_id}: {e}"
self.tree_cache.cache_finished_req(req)
prepare_abort(
req,
error_message,
status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
)
req.grammar.finished = req.finished() req.grammar.finished = req.finished()
else: else:
# being chunked reqs' prefill is not finished # being chunked reqs' prefill is not finished
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment