Unverified Commit ad56e684 authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

Fix stuck in `get_new_prefill_batch` (#948)

parent ffb15744
...@@ -364,12 +364,13 @@ class ModelTpServer: ...@@ -364,12 +364,13 @@ class ModelTpServer:
# Compute matched prefix length # Compute matched prefix length
for req in self.waiting_queue: for req in self.waiting_queue:
req.input_ids = req.origin_input_ids + req.output_ids req.input_ids = req.origin_input_ids + req.output_ids
try_match_ids = req.input_ids
if req.return_logprob:
try_match_ids = req.input_ids[: req.logprob_start_len]
# NOTE: the prefix_indices must always be aligned with last_node
prefix_indices, last_node = self.tree_cache.match_prefix( prefix_indices, last_node = self.tree_cache.match_prefix(
rid=req.rid, rid=req.rid, key=try_match_ids
key=req.input_ids,
) )
if req.return_logprob:
prefix_indices = prefix_indices[: req.logprob_start_len]
req.extend_input_len = len(req.input_ids) - len(prefix_indices) req.extend_input_len = len(req.input_ids) - len(prefix_indices)
req.prefix_indices = prefix_indices req.prefix_indices = prefix_indices
req.last_node = last_node req.last_node = last_node
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment