"tests/vscode:/vscode.git/clone" did not exist on "f2ba58e568a91489c6bdf024cef64cab1a9e9f77"
Unverified Commit f6e5023b authored by Sergey Plotnikov's avatar Sergey Plotnikov Committed by GitHub
Browse files

fix: race condition in multi modal PD worker (#7679)


Signed-off-by: default avatarSergey Plotnikov <sergey.plotnikov@intel.com>
Co-authored-by: default avatarishandhanani <82981111+ishandhanani@users.noreply.github.com>
parent 9d54ee00
...@@ -280,9 +280,13 @@ class MultimodalPDWorkerHandler(BaseWorkerHandler[dict, dict]): ...@@ -280,9 +280,13 @@ class MultimodalPDWorkerHandler(BaseWorkerHandler[dict, dict]):
logger.debug( logger.debug(
f"length of expanded prompt ids: {len(response.prompt_token_ids)}" f"length of expanded prompt ids: {len(response.prompt_token_ids)}"
) )
yield self._format_engine_output(response, num_output_tokens_so_far) chunk = self._format_engine_output(response, num_output_tokens_so_far)
# Capture token count BEFORE yield — vLLM may mutate
# response.outputs[0].token_ids in-place while we're suspended.
if response.outputs: if response.outputs:
num_output_tokens_so_far = len(response.outputs[0].token_ids) num_output_tokens_so_far = len(response.outputs[0].token_ids)
yield chunk
finally: finally:
if first_token: if first_token:
if rng_ttft is not None: if rng_ttft is not None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment