[Model] Use context managers for encoder- and LM-only mode (#32605)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[Model] Use context managers for encoder- and LM-only mode (#32605)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
4753f3bf · Cyrus Leung · GitHub · 6c01ffb8 · 4753f3bf
Unverified Commit 4753f3bf authored Jan 20, 2026 by Cyrus Leung Committed by GitHub Jan 20, 2026
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 4 deletions

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +6 -4

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -65,7 +65,6 @@ from vllm.model_executor.models.interfaces import (
    SupportsXDRoPE,
    is_mixture_of_experts,
    supports_eagle3,
-    supports_mm_encoder_only,
    supports_mrope,
    supports_multimodal_pruning,
    supports_transcription,
@@ -4271,7 +4270,8 @@ class GPUModelRunner(
            remove_lora: If False, dummy LoRAs are not destroyed after the run
            activate_lora: If False, dummy_run is performed without LoRAs.
        """
-        if supports_mm_encoder_only(self.model):
+        mm_config = self.vllm_config.model_config.multimodal_config
+        if mm_config and mm_config.mm_encoder_only:
            # The current dummy run only covers LM execution, so we can skip it.
            # mm encoder dummy run may need to add in the future.
            return torch.tensor([]), torch.tensor([])
@@ -4558,7 +4558,8 @@ class GPUModelRunner(
        # like `inf` or `nan`.
        # To avoid breaking the sampler, we use a random tensor here instead.
-        if supports_mm_encoder_only(self.model):
+        mm_config = self.vllm_config.model_config.multimodal_config
+        if mm_config and mm_config.mm_encoder_only:
            # MM Encoder only model no need to run sampler.
            return torch.tensor([])
@@ -4687,7 +4688,8 @@ class GPUModelRunner(
        self,
        hidden_states: torch.Tensor,
    ) -> PoolerOutput:
-        if supports_mm_encoder_only(self.model):
+        mm_config = self.vllm_config.model_config.multimodal_config
+        if mm_config and mm_config.mm_encoder_only:
            # MM Encoder only model not need to run pooler.
            return torch.tensor([])