[Core] Eliminate redundant is_encoder_decoder lookups (20-40us/step) (#29800)

Signed-off-by: Wushi Dong <dongws@meta.com>

[Core] Eliminate redundant is_encoder_decoder lookups (20-40us/step) (#29800)
Signed-off-by: Wushi Dong <dongws@meta.com>
0037b574 · Wushi Dong · GitHub · f5b0846b · 0037b574
Unverified Commit 0037b574 authored Dec 01, 2025 by Wushi Dong Committed by GitHub Dec 02, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 9 deletions

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +3 -9

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2439,16 +2439,13 @@ class GPUModelRunner(
    ]:
        num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens
        is_first_rank = get_pp_group().is_first_rank
+        is_encoder_decoder = self.model_config.is_encoder_decoder

        # _prepare_inputs may reorder the batch, so we must gather multi
        # modal outputs after that to ensure the correct order
        ec_connector_output = None

-        if (
-            self.supports_mm_inputs
-            and is_first_rank
-            and not self.model_config.is_encoder_decoder
-        ):
+        if self.supports_mm_inputs and is_first_rank and not is_encoder_decoder:
            # Run the multimodal encoder if any.
            with self.maybe_get_ec_connector_output(
                scheduler_output,
@@ -2526,10 +2523,7 @@ class GPUModelRunner(
                num_input_tokens, intermediate_tensors, True
            )

-        if (
-            self.model_config.is_encoder_decoder
-            and scheduler_output.scheduled_encoder_inputs
-        ):
+        if is_encoder_decoder and scheduler_output.scheduled_encoder_inputs:
            # Run the encoder, just like we do with other multimodal inputs.
            # For an encoder-decoder model, our processing here is a bit
            # simpler, because the outputs are just passed to the decoder.