Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8c31f47c
Unverified
Commit
8c31f47c
authored
Mar 18, 2026
by
Jee Jee Li
Committed by
GitHub
Mar 18, 2026
Browse files
[LoRA] Make LoRA respect `language_model_only` (#37375)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
26180124
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
3 deletions
+14
-3
vllm/lora/model_manager.py
vllm/lora/model_manager.py
+13
-1
vllm/lora/ops/triton_ops/fused_moe_lora_fp8_op.py
vllm/lora/ops/triton_ops/fused_moe_lora_fp8_op.py
+1
-2
No files found.
vllm/lora/model_manager.py
View file @
8c31f47c
...
@@ -161,9 +161,9 @@ class LoRAModelManager:
...
@@ -161,9 +161,9 @@ class LoRAModelManager:
device
=
self
.
device
,
device
=
self
.
device
,
lora_config
=
self
.
lora_config
,
lora_config
=
self
.
lora_config
,
)
)
lm_prefix
=
self
.
mm_mapping
.
language_model
[
0
]
lm_prefix
=
self
.
mm_mapping
.
language_model
[
0
]
self
.
punica_wrapper_mapping
[
lm_prefix
]
=
llm_punica_wrapper
self
.
punica_wrapper_mapping
[
lm_prefix
]
=
llm_punica_wrapper
if
self
.
lora_config
.
enable_tower_connector_lora
:
if
self
.
lora_config
.
enable_tower_connector_lora
:
self
.
supports_tower_connector_lora
=
self
.
supports_mm
and
hasattr
(
self
.
supports_tower_connector_lora
=
self
.
supports_mm
and
hasattr
(
self
.
model
,
"get_num_mm_encoder_tokens"
self
.
model
,
"get_num_mm_encoder_tokens"
...
@@ -171,6 +171,18 @@ class LoRAModelManager:
...
@@ -171,6 +171,18 @@ class LoRAModelManager:
if
not
self
.
supports_tower_connector_lora
:
if
not
self
.
supports_tower_connector_lora
:
return
return
if
(
vllm_config
.
model_config
.
multimodal_config
and
vllm_config
.
model_config
.
multimodal_config
.
language_model_only
):
if
self
.
supports_tower_connector_lora
:
logger
.
warning
(
"Disabling `enable_tower_connector_lora` because the multimodal "
"model is configured to initialize the language model only."
)
self
.
supports_tower_connector_lora
=
False
return
logger
.
warning
(
logger
.
warning
(
"LoRA for the tower and connector of multimodal models is "
"LoRA for the tower and connector of multimodal models is "
"experimental and may contain bugs. Please report any related issues on "
"experimental and may contain bugs. Please report any related issues on "
...
...
vllm/lora/ops/triton_ops/fused_moe_lora_fp8_op.py
View file @
8c31f47c
...
@@ -10,11 +10,10 @@ from vllm.distributed import (
...
@@ -10,11 +10,10 @@ from vllm.distributed import (
tensor_model_parallel_all_gather
,
tensor_model_parallel_all_gather
,
tensor_model_parallel_all_reduce
,
tensor_model_parallel_all_reduce
,
)
)
from
vllm.lora.ops.triton_ops.utils
import
supports_pdl
from
vllm.triton_utils
import
tl
,
triton
from
vllm.triton_utils
import
tl
,
triton
from
vllm.utils.torch_utils
import
direct_register_custom_op
from
vllm.utils.torch_utils
import
direct_register_custom_op
from
.utils
import
supports_pdl
@
triton
.
jit
@
triton
.
jit
def
_get_lora_id
(
def
_get_lora_id
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment