Unverified Commit 094fcce2 authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[BugFix] Re-fix async multimodal cpu tensor race condition (#31373)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
Signed-off-by: default avatarnjhill <nickhill123@gmail.com>
parent 573dd0e6
......@@ -3058,8 +3058,10 @@ class GPUModelRunner(
scheduler_output = deepcopy(scheduler_output)
num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
with record_function_or_nullcontext("gpu_model_runner: preprocess"):
with self.synchronize_input_prep():
with (
record_function_or_nullcontext("gpu_model_runner: preprocess"),
self.synchronize_input_prep(),
):
# Update persistent batch states.
self._update_states(scheduler_output)
......@@ -3087,9 +3089,8 @@ class GPUModelRunner(
if not has_kv_transfer_group():
# Return empty ModelRunnerOutput if no work to do.
return EMPTY_MODEL_RUNNER_OUTPUT
return self.kv_connector_no_forward(
scheduler_output, self.vllm_config
)
return self.kv_connector_no_forward(scheduler_output, self.vllm_config)
if self.cache_config.kv_sharing_fast_prefill:
assert not self.num_prompt_logprobs, (
"--kv-sharing-fast-prefill produces incorrect "
......@@ -3104,10 +3105,7 @@ class GPUModelRunner(
max_num_scheduled_tokens = int(num_scheduled_tokens_np.max())
num_tokens_unpadded = scheduler_output.total_num_scheduled_tokens
(
logits_indices,
spec_decode_metadata,
) = self._prepare_inputs(
logits_indices, spec_decode_metadata = self._prepare_inputs(
scheduler_output,
num_scheduled_tokens_np,
)
......@@ -3169,7 +3167,7 @@ class GPUModelRunner(
use_spec_decode = len(scheduler_output.scheduled_spec_decode_tokens) > 0
ubatch_slices_attn = ubatch_slices_padded if pad_attn else ubatch_slices
(attn_metadata, spec_decode_common_attn_metadata) = (
attn_metadata, spec_decode_common_attn_metadata = (
self._build_attention_metadata(
num_tokens=num_tokens_unpadded,
num_tokens_padded=num_tokens_padded if pad_attn else None,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment