Unverified Commit 8d0aabdd authored by Tianyu Guo's avatar Tianyu Guo Committed by GitHub
Browse files

Fix the order of _free_encoder_inputs (#38907)


Signed-off-by: default avatarTianyu Guo <guoty9@mail2.sysu.edu.cn>
Co-authored-by: default avatarmergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
parent 0f3ce4c7
......@@ -996,14 +996,6 @@ class Scheduler(SchedulerInterface):
request.use_structured_output and not request.is_prefill_chunk
)
# NOTE: _free_encoder_inputs relies on num_computed_tokens, which
# may be updated again in _update_from_output for speculative
# decoding. However, it is safe to call the method here because
# encoder inputs are always part of the prompt, not the output,
# and thus are unaffected by speculative decoding.
if request.has_encoder_inputs:
self._free_encoder_inputs(request)
# Clear the finished request IDs.
# NOTE: We shouldn't do self.finished_req_ids.clear() here because
# it will also affect the scheduler output.
......@@ -1389,6 +1381,10 @@ class Scheduler(SchedulerInterface):
request_id=req_id,
)
# Free encoder inputs only after the step has actually executed.
if request.has_encoder_inputs:
self._free_encoder_inputs(request)
stopped = False
new_logprobs = None
new_token_ids = generated_token_ids
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment