Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f1d1bf62
Unverified
Commit
f1d1bf62
authored
Dec 22, 2024
by
Jason T. Greene
Committed by
GitHub
Dec 22, 2024
Browse files
[Bugfix] Fix fully sharded LoRAs with Mixtral (#11390)
Signed-off-by:
Jason Greene
<
jason.greene@redhat.com
>
parent
72d9c316
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
2 deletions
+5
-2
tests/lora/test_mixtral.py
tests/lora/test_mixtral.py
+3
-1
vllm/lora/layers.py
vllm/lora/layers.py
+2
-1
No files found.
tests/lora/test_mixtral.py
View file @
f1d1bf62
...
...
@@ -62,8 +62,9 @@ def test_mixtral_lora(mixtral_lora_files, tp_size):
@
pytest
.
mark
.
parametrize
(
"tp_size"
,
[
4
])
@
pytest
.
mark
.
parametrize
(
"fully_shard"
,
[
True
,
False
])
def
test_mixtral_lora_all_target_modules
(
mixtral_lora_files_all_target_modules
,
tp_size
):
tp_size
,
fully_shard
):
"""This LoRA model has all supported Mixtral target modules"""
if
torch
.
cuda
.
device_count
()
<
tp_size
:
...
...
@@ -82,6 +83,7 @@ def test_mixtral_lora_all_target_modules(mixtral_lora_files_all_target_modules,
max_loras
=
4
,
distributed_executor_backend
=
"ray"
,
tensor_parallel_size
=
tp_size
,
fully_sharded_loras
=
fully_shard
,
max_lora_rank
=
32
,
)
...
...
vllm/lora/layers.py
View file @
f1d1bf62
...
...
@@ -425,8 +425,9 @@ class ReplicatedLinearWithLoRA(BaseLinearLayerWithLoRA):
if
self
.
base_layer
.
skip_bias_add
else
None
)
return
output
,
output_bias
# ReplicatedLinear should always be replaced, regardless of the fully
# sharded LoRAs setting, because it is, by definition, copied per GPU.
@
classmethod
@
_not_fully_sharded_can_replace
def
can_replace_layer
(
cls
,
source_layer
:
nn
.
Module
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment