Unverified Commit 4753f3bf authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Model] Use context managers for encoder- and LM-only mode (#32605)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 6c01ffb8
...@@ -65,7 +65,6 @@ from vllm.model_executor.models.interfaces import ( ...@@ -65,7 +65,6 @@ from vllm.model_executor.models.interfaces import (
SupportsXDRoPE, SupportsXDRoPE,
is_mixture_of_experts, is_mixture_of_experts,
supports_eagle3, supports_eagle3,
supports_mm_encoder_only,
supports_mrope, supports_mrope,
supports_multimodal_pruning, supports_multimodal_pruning,
supports_transcription, supports_transcription,
...@@ -4271,7 +4270,8 @@ class GPUModelRunner( ...@@ -4271,7 +4270,8 @@ class GPUModelRunner(
remove_lora: If False, dummy LoRAs are not destroyed after the run remove_lora: If False, dummy LoRAs are not destroyed after the run
activate_lora: If False, dummy_run is performed without LoRAs. activate_lora: If False, dummy_run is performed without LoRAs.
""" """
if supports_mm_encoder_only(self.model): mm_config = self.vllm_config.model_config.multimodal_config
if mm_config and mm_config.mm_encoder_only:
# The current dummy run only covers LM execution, so we can skip it. # The current dummy run only covers LM execution, so we can skip it.
# mm encoder dummy run may need to add in the future. # mm encoder dummy run may need to add in the future.
return torch.tensor([]), torch.tensor([]) return torch.tensor([]), torch.tensor([])
...@@ -4558,7 +4558,8 @@ class GPUModelRunner( ...@@ -4558,7 +4558,8 @@ class GPUModelRunner(
# like `inf` or `nan`. # like `inf` or `nan`.
# To avoid breaking the sampler, we use a random tensor here instead. # To avoid breaking the sampler, we use a random tensor here instead.
if supports_mm_encoder_only(self.model): mm_config = self.vllm_config.model_config.multimodal_config
if mm_config and mm_config.mm_encoder_only:
# MM Encoder only model no need to run sampler. # MM Encoder only model no need to run sampler.
return torch.tensor([]) return torch.tensor([])
...@@ -4687,7 +4688,8 @@ class GPUModelRunner( ...@@ -4687,7 +4688,8 @@ class GPUModelRunner(
self, self,
hidden_states: torch.Tensor, hidden_states: torch.Tensor,
) -> PoolerOutput: ) -> PoolerOutput:
if supports_mm_encoder_only(self.model): mm_config = self.vllm_config.model_config.multimodal_config
if mm_config and mm_config.mm_encoder_only:
# MM Encoder only model not need to run pooler. # MM Encoder only model not need to run pooler.
return torch.tensor([]) return torch.tensor([])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment