Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4753f3bf
Unverified
Commit
4753f3bf
authored
Jan 20, 2026
by
Cyrus Leung
Committed by
GitHub
Jan 20, 2026
Browse files
[Model] Use context managers for encoder- and LM-only mode (#32605)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
6c01ffb8
Changes
21
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
4 deletions
+6
-4
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+6
-4
No files found.
vllm/v1/worker/gpu_model_runner.py
View file @
4753f3bf
...
@@ -65,7 +65,6 @@ from vllm.model_executor.models.interfaces import (
...
@@ -65,7 +65,6 @@ from vllm.model_executor.models.interfaces import (
SupportsXDRoPE
,
SupportsXDRoPE
,
is_mixture_of_experts
,
is_mixture_of_experts
,
supports_eagle3
,
supports_eagle3
,
supports_mm_encoder_only
,
supports_mrope
,
supports_mrope
,
supports_multimodal_pruning
,
supports_multimodal_pruning
,
supports_transcription
,
supports_transcription
,
...
@@ -4271,7 +4270,8 @@ class GPUModelRunner(
...
@@ -4271,7 +4270,8 @@ class GPUModelRunner(
remove_lora: If False, dummy LoRAs are not destroyed after the run
remove_lora: If False, dummy LoRAs are not destroyed after the run
activate_lora: If False, dummy_run is performed without LoRAs.
activate_lora: If False, dummy_run is performed without LoRAs.
"""
"""
if
supports_mm_encoder_only
(
self
.
model
):
mm_config
=
self
.
vllm_config
.
model_config
.
multimodal_config
if
mm_config
and
mm_config
.
mm_encoder_only
:
# The current dummy run only covers LM execution, so we can skip it.
# The current dummy run only covers LM execution, so we can skip it.
# mm encoder dummy run may need to add in the future.
# mm encoder dummy run may need to add in the future.
return
torch
.
tensor
([]),
torch
.
tensor
([])
return
torch
.
tensor
([]),
torch
.
tensor
([])
...
@@ -4558,7 +4558,8 @@ class GPUModelRunner(
...
@@ -4558,7 +4558,8 @@ class GPUModelRunner(
# like `inf` or `nan`.
# like `inf` or `nan`.
# To avoid breaking the sampler, we use a random tensor here instead.
# To avoid breaking the sampler, we use a random tensor here instead.
if
supports_mm_encoder_only
(
self
.
model
):
mm_config
=
self
.
vllm_config
.
model_config
.
multimodal_config
if
mm_config
and
mm_config
.
mm_encoder_only
:
# MM Encoder only model no need to run sampler.
# MM Encoder only model no need to run sampler.
return
torch
.
tensor
([])
return
torch
.
tensor
([])
...
@@ -4687,7 +4688,8 @@ class GPUModelRunner(
...
@@ -4687,7 +4688,8 @@ class GPUModelRunner(
self
,
self
,
hidden_states
:
torch
.
Tensor
,
hidden_states
:
torch
.
Tensor
,
)
->
PoolerOutput
:
)
->
PoolerOutput
:
if
supports_mm_encoder_only
(
self
.
model
):
mm_config
=
self
.
vllm_config
.
model_config
.
multimodal_config
if
mm_config
and
mm_config
.
mm_encoder_only
:
# MM Encoder only model not need to run pooler.
# MM Encoder only model not need to run pooler.
return
torch
.
tensor
([])
return
torch
.
tensor
([])
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment