Unverified Commit f6e5023b authored by Sergey Plotnikov's avatar Sergey Plotnikov Committed by GitHub
Browse files

fix: race condition in multi modal PD worker (#7679)


Signed-off-by: default avatarSergey Plotnikov <sergey.plotnikov@intel.com>
Co-authored-by: default avatarishandhanani <82981111+ishandhanani@users.noreply.github.com>
parent 9d54ee00
......@@ -280,9 +280,13 @@ class MultimodalPDWorkerHandler(BaseWorkerHandler[dict, dict]):
logger.debug(
f"length of expanded prompt ids: {len(response.prompt_token_ids)}"
)
yield self._format_engine_output(response, num_output_tokens_so_far)
chunk = self._format_engine_output(response, num_output_tokens_so_far)
# Capture token count BEFORE yield — vLLM may mutate
# response.outputs[0].token_ids in-place while we're suspended.
if response.outputs:
num_output_tokens_so_far = len(response.outputs[0].token_ids)
yield chunk
finally:
if first_token:
if rng_ttft is not None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment