Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9118217f
"vscode:/vscode.git/clone" did not exist on "a7d5b016bd0a882767dc1c3cc1537dc8c93a2ea7"
Unverified
Commit
9118217f
authored
Aug 06, 2024
by
Jee Jee Li
Committed by
GitHub
Aug 06, 2024
Browse files
[LoRA] Relax LoRA condition (#7146)
parent
e3c664bf
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
8 additions
and
7 deletions
+8
-7
tests/lora/test_layers.py
tests/lora/test_layers.py
+1
-1
tests/lora/test_punica_variation.py
tests/lora/test_punica_variation.py
+1
-1
vllm/config.py
vllm/config.py
+3
-2
vllm/lora/layers.py
vllm/lora/layers.py
+3
-3
No files found.
tests/lora/test_layers.py
View file @
9118217f
...
@@ -420,7 +420,7 @@ def test_embeddings_with_new_embeddings(dist_init, num_loras, device,
...
@@ -420,7 +420,7 @@ def test_embeddings_with_new_embeddings(dist_init, num_loras, device,
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
,
8
])
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
,
8
])
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
@
pytest
.
mark
.
parametrize
(
"device"
,
CUDA_DEVICES
)
@
pytest
.
mark
.
parametrize
(
"vocab_size"
,
[
512
,
32000
,
64000
,
128000
])
@
pytest
.
mark
.
parametrize
(
"vocab_size"
,
[
512
,
32000
,
64000
,
256512
])
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
def
test_lm_head_logits_processor
(
dist_init
,
num_loras
,
device
,
vocab_size
,
def
test_lm_head_logits_processor
(
dist_init
,
num_loras
,
device
,
vocab_size
,
stage
)
->
None
:
stage
)
->
None
:
...
...
tests/lora/test_punica_variation.py
View file @
9118217f
...
@@ -25,7 +25,7 @@ HIDDEN_SIZES = [3424, 4096, 4097]
...
@@ -25,7 +25,7 @@ HIDDEN_SIZES = [3424, 4096, 4097]
BATCHES
=
[
1
,
4
,
16
,
32
]
BATCHES
=
[
1
,
4
,
16
,
32
]
NUM_LORA
=
[
1
,
4
,
8
,
16
,
32
,
64
,
128
]
NUM_LORA
=
[
1
,
4
,
8
,
16
,
32
,
64
,
128
]
DTYPES
=
[
torch
.
float16
,
torch
.
bfloat16
]
DTYPES
=
[
torch
.
float16
,
torch
.
bfloat16
]
MAX_RANKS
=
[
1
,
4
,
8
,
16
,
32
,
64
,
128
]
MAX_RANKS
=
[
1
,
4
,
8
,
16
,
32
,
64
,
128
,
256
]
SCALES
=
[
0.5
]
SCALES
=
[
0.5
]
SEED
=
[
0
]
SEED
=
[
0
]
CUDA_DEVICES
=
[
f
"cuda:
{
0
}
"
]
CUDA_DEVICES
=
[
f
"cuda:
{
0
}
"
]
...
...
vllm/config.py
View file @
9118217f
...
@@ -1311,8 +1311,9 @@ class LoRAConfig:
...
@@ -1311,8 +1311,9 @@ class LoRAConfig:
long_lora_scaling_factors
:
Optional
[
Tuple
[
float
]]
=
None
long_lora_scaling_factors
:
Optional
[
Tuple
[
float
]]
=
None
def
__post_init__
(
self
):
def
__post_init__
(
self
):
# TODO: Increase the range of rank
# Setting the maximum rank to 256 should be able to satisfy the vast
possible_max_ranks
=
(
8
,
16
,
32
,
64
)
# majority of applications.
possible_max_ranks
=
(
8
,
16
,
32
,
64
,
128
,
256
)
possible_lora_extra_vocab_size
=
(
0
,
256
,
512
)
possible_lora_extra_vocab_size
=
(
0
,
256
,
512
)
if
self
.
max_lora_rank
not
in
possible_max_ranks
:
if
self
.
max_lora_rank
not
in
possible_max_ranks
:
raise
ValueError
(
raise
ValueError
(
...
...
vllm/lora/layers.py
View file @
9118217f
...
@@ -1073,10 +1073,10 @@ class LogitsProcessorWithLoRA(BaseLayerWithLoRA):
...
@@ -1073,10 +1073,10 @@ class LogitsProcessorWithLoRA(BaseLayerWithLoRA):
lora_config
:
LoRAConfig
,
lora_config
:
LoRAConfig
,
model_config
:
Optional
[
PretrainedConfig
]
=
None
,
model_config
:
Optional
[
PretrainedConfig
]
=
None
,
)
->
None
:
)
->
None
:
# TODO: Verify if this condition can be relaxed
# TODO: Verify if this condition can be
further
relaxed
if
32000
<
self
.
base_layer
.
vocab_size
>
128512
:
if
32000
<
self
.
base_layer
.
vocab_size
>
257024
:
raise
ValueError
(
"When using LoRA, vocab size must be "
raise
ValueError
(
"When using LoRA, vocab size must be "
"32000 >= vocab_size <=
128512
"
)
"32000 >= vocab_size <=
257024
"
)
self
.
lora_a_stacked
=
torch
.
zeros
(
self
.
lora_a_stacked
=
torch
.
zeros
(
(
(
max_loras
,
max_loras
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment