"vscode:/vscode.git/clone" did not exist on "3908ec24879e1579c9e195e5b31bb53eb3af4dce"
Unverified Commit 8d0aabdd authored by Tianyu Guo's avatar Tianyu Guo Committed by GitHub
Browse files

Fix the order of _free_encoder_inputs (#38907)


Signed-off-by: default avatarTianyu Guo <guoty9@mail2.sysu.edu.cn>
Co-authored-by: default avatarmergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
parent 0f3ce4c7
...@@ -996,14 +996,6 @@ class Scheduler(SchedulerInterface): ...@@ -996,14 +996,6 @@ class Scheduler(SchedulerInterface):
request.use_structured_output and not request.is_prefill_chunk request.use_structured_output and not request.is_prefill_chunk
) )
# NOTE: _free_encoder_inputs relies on num_computed_tokens, which
# may be updated again in _update_from_output for speculative
# decoding. However, it is safe to call the method here because
# encoder inputs are always part of the prompt, not the output,
# and thus are unaffected by speculative decoding.
if request.has_encoder_inputs:
self._free_encoder_inputs(request)
# Clear the finished request IDs. # Clear the finished request IDs.
# NOTE: We shouldn't do self.finished_req_ids.clear() here because # NOTE: We shouldn't do self.finished_req_ids.clear() here because
# it will also affect the scheduler output. # it will also affect the scheduler output.
...@@ -1389,6 +1381,10 @@ class Scheduler(SchedulerInterface): ...@@ -1389,6 +1381,10 @@ class Scheduler(SchedulerInterface):
request_id=req_id, request_id=req_id,
) )
# Free encoder inputs only after the step has actually executed.
if request.has_encoder_inputs:
self._free_encoder_inputs(request)
stopped = False stopped = False
new_logprobs = None new_logprobs = None
new_token_ids = generated_token_ids new_token_ids = generated_token_ids
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment