Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e0394076
Unverified
Commit
e0394076
authored
Aug 27, 2025
by
Jee Jee Li
Committed by
GitHub
Aug 27, 2025
Browse files
[CI/Build] Reduce LoRA layer test cases (#23721)
Signed-off-by:
Jee Jee Li
<
pandaleefree@gmail.com
>
parent
11eddf02
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
33 additions
and
39 deletions
+33
-39
tests/lora/test_layers.py
tests/lora/test_layers.py
+33
-39
No files found.
tests/lora/test_layers.py
View file @
e0394076
...
@@ -243,7 +243,7 @@ def check_punica_wrapper(punica_wrapper) -> bool:
...
@@ -243,7 +243,7 @@ def check_punica_wrapper(punica_wrapper) -> bool:
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
,
8
])
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
])
@
pytest
.
mark
.
parametrize
(
"device"
,
DEVICES
)
@
pytest
.
mark
.
parametrize
(
"device"
,
DEVICES
)
@
pytest
.
mark
.
parametrize
(
"vocab_size"
,
[
512
,
32000
,
64000
,
128000
])
@
pytest
.
mark
.
parametrize
(
"vocab_size"
,
[
512
,
32000
,
64000
,
128000
])
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
...
@@ -347,7 +347,7 @@ def test_embeddings(dist_init, num_loras, device, vocab_size, stage) -> None:
...
@@ -347,7 +347,7 @@ def test_embeddings(dist_init, num_loras, device, vocab_size, stage) -> None:
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
# @pytest.mark.skip(
# @pytest.mark.skip(
# reason="Fails when loras are in any slot other than the first.")
# reason="Fails when loras are in any slot other than the first.")
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
,
8
])
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
])
@
pytest
.
mark
.
parametrize
(
"device"
,
DEVICES
)
@
pytest
.
mark
.
parametrize
(
"device"
,
DEVICES
)
@
pytest
.
mark
.
parametrize
(
"vocab_size"
,
[
512
,
32000
,
64000
,
128000
])
@
pytest
.
mark
.
parametrize
(
"vocab_size"
,
[
512
,
32000
,
64000
,
128000
])
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
...
@@ -486,7 +486,7 @@ def test_embeddings_with_new_embeddings(dist_init, num_loras, device,
...
@@ -486,7 +486,7 @@ def test_embeddings_with_new_embeddings(dist_init, num_loras, device,
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
,
8
])
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
])
@
pytest
.
mark
.
parametrize
(
"device"
,
DEVICES
)
@
pytest
.
mark
.
parametrize
(
"device"
,
DEVICES
)
@
pytest
.
mark
.
parametrize
(
"vocab_size"
,
[
512
,
32000
,
64000
,
256512
])
@
pytest
.
mark
.
parametrize
(
"vocab_size"
,
[
512
,
32000
,
64000
,
256512
])
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
...
@@ -620,12 +620,15 @@ def test_lm_head_logits_processor(dist_init, num_loras, device, vocab_size,
...
@@ -620,12 +620,15 @@ def test_lm_head_logits_processor(dist_init, num_loras, device, vocab_size,
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
,
8
])
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
])
@
pytest
.
mark
.
parametrize
(
"device"
,
DEVICES
)
@
pytest
.
mark
.
parametrize
(
"device"
,
DEVICES
)
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
@
pytest
.
mark
.
parametrize
(
"bias_enabled"
,
[
True
,
False
])
def
test_linear_replicated
(
def
test_linear_replicated
(
dist_init
,
num_loras
,
device
,
stage
,
dist_init
,
bias_enabled
)
->
None
:
num_loras
,
device
,
stage
,
)
->
None
:
if
current_platform
.
is_cuda_alike
():
if
current_platform
.
is_cuda_alike
():
torch
.
cuda
.
set_device
(
device
)
torch
.
cuda
.
set_device
(
device
)
...
@@ -634,10 +637,11 @@ def test_linear_replicated(dist_init, num_loras, device, stage,
...
@@ -634,10 +637,11 @@ def test_linear_replicated(dist_init, num_loras, device, stage,
torch
.
set_default_device
(
device
)
torch
.
set_default_device
(
device
)
punica_wrapper
=
get_punica_wrapper
(
8192
,
256
,
device
,
max_loras
=
max_loras
)
punica_wrapper
=
get_punica_wrapper
(
8192
,
256
,
device
,
max_loras
=
max_loras
)
assert
check_punica_wrapper
(
punica_wrapper
)
assert
check_punica_wrapper
(
punica_wrapper
)
lora_config
=
LoRAConfig
(
max_loras
=
max_loras
,
lora_config
=
LoRAConfig
(
max_lora_rank
=
8
,
max_loras
=
max_loras
,
lora_dtype
=
torch
.
float16
,
max_lora_rank
=
8
,
bias_enabled
=
bias_enabled
)
lora_dtype
=
torch
.
float16
,
)
def
create_random_linear_replicated_layer
():
def
create_random_linear_replicated_layer
():
...
@@ -651,10 +655,6 @@ def test_linear_replicated(dist_init, num_loras, device, stage,
...
@@ -651,10 +655,6 @@ def test_linear_replicated(dist_init, num_loras, device, stage,
lora_linear
.
create_lora_weights
(
max_loras
,
lora_config
)
lora_linear
.
create_lora_weights
(
max_loras
,
lora_config
)
assert
(
lora_linear
.
n_slices
==
len
(
lora_linear
.
lora_a_stacked
)
==
len
(
assert
(
lora_linear
.
n_slices
==
len
(
lora_linear
.
lora_a_stacked
)
==
len
(
lora_linear
.
lora_b_stacked
)
==
1
)
lora_linear
.
lora_b_stacked
)
==
1
)
if
bias_enabled
:
assert
len
(
lora_linear
.
lora_bias_stacked
)
==
lora_linear
.
n_slices
else
:
assert
lora_linear
.
lora_bias_stacked
is
None
return
linear
,
lora_linear
return
linear
,
lora_linear
for
i
in
range
(
NUM_RANDOM_SEEDS
):
for
i
in
range
(
NUM_RANDOM_SEEDS
):
...
@@ -734,14 +734,13 @@ def test_linear_replicated(dist_init, num_loras, device, stage,
...
@@ -734,14 +734,13 @@ def test_linear_replicated(dist_init, num_loras, device, stage,
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
,
8
])
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
])
@
pytest
.
mark
.
parametrize
(
"orientation"
,
[
"row"
,
"column"
])
@
pytest
.
mark
.
parametrize
(
"orientation"
,
[
"row"
,
"column"
])
@
pytest
.
mark
.
parametrize
(
"fully_shard"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"fully_shard"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"device"
,
DEVICES
)
@
pytest
.
mark
.
parametrize
(
"device"
,
DEVICES
)
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
@
pytest
.
mark
.
parametrize
(
"bias_enabled"
,
[
True
,
False
])
def
test_linear_parallel
(
dist_init
,
num_loras
,
orientation
,
fully_shard
,
def
test_linear_parallel
(
dist_init
,
num_loras
,
orientation
,
fully_shard
,
device
,
stage
,
bias_enabled
)
->
None
:
device
,
stage
)
->
None
:
if
current_platform
.
is_cuda_alike
():
if
current_platform
.
is_cuda_alike
():
torch
.
cuda
.
set_device
(
device
)
torch
.
cuda
.
set_device
(
device
)
...
@@ -750,11 +749,12 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
...
@@ -750,11 +749,12 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
torch
.
set_default_device
(
device
)
torch
.
set_default_device
(
device
)
punica_wrapper
=
get_punica_wrapper
(
8192
,
256
,
device
,
max_loras
=
max_loras
)
punica_wrapper
=
get_punica_wrapper
(
8192
,
256
,
device
,
max_loras
=
max_loras
)
assert
check_punica_wrapper
(
punica_wrapper
)
assert
check_punica_wrapper
(
punica_wrapper
)
lora_config
=
LoRAConfig
(
max_loras
=
max_loras
,
lora_config
=
LoRAConfig
(
max_lora_rank
=
8
,
max_loras
=
max_loras
,
fully_sharded_loras
=
fully_shard
,
max_lora_rank
=
8
,
lora_dtype
=
torch
.
float16
,
fully_sharded_loras
=
fully_shard
,
bias_enabled
=
bias_enabled
)
lora_dtype
=
torch
.
float16
,
)
def
create_random_linear_parallel_layer
():
def
create_random_linear_parallel_layer
():
if
orientation
==
"row"
:
if
orientation
==
"row"
:
...
@@ -777,10 +777,7 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
...
@@ -777,10 +777,7 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
lora_linear
.
create_lora_weights
(
max_loras
,
lora_config
)
lora_linear
.
create_lora_weights
(
max_loras
,
lora_config
)
assert
(
lora_linear
.
n_slices
==
len
(
lora_linear
.
lora_a_stacked
)
==
len
(
assert
(
lora_linear
.
n_slices
==
len
(
lora_linear
.
lora_a_stacked
)
==
len
(
lora_linear
.
lora_b_stacked
)
==
1
)
lora_linear
.
lora_b_stacked
)
==
1
)
if
bias_enabled
:
assert
len
(
lora_linear
.
lora_bias_stacked
)
==
lora_linear
.
n_slices
else
:
assert
lora_linear
.
lora_bias_stacked
is
None
return
linear
,
lora_linear
return
linear
,
lora_linear
for
i
in
range
(
NUM_RANDOM_SEEDS
):
for
i
in
range
(
NUM_RANDOM_SEEDS
):
...
@@ -860,14 +857,13 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
...
@@ -860,14 +857,13 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
@
torch
.
inference_mode
()
@
torch
.
inference_mode
()
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
,
8
])
@
pytest
.
mark
.
parametrize
(
"num_loras"
,
[
1
,
2
,
4
])
@
pytest
.
mark
.
parametrize
(
"repeats"
,
[
1
,
2
,
3
])
@
pytest
.
mark
.
parametrize
(
"repeats"
,
[
1
,
2
,
3
])
@
pytest
.
mark
.
parametrize
(
"fully_shard"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"fully_shard"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"device"
,
DEVICES
)
@
pytest
.
mark
.
parametrize
(
"device"
,
DEVICES
)
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
@
pytest
.
mark
.
parametrize
(
"stage"
,
STAGES
)
@
pytest
.
mark
.
parametrize
(
"bias_enabled"
,
[
True
,
False
])
def
test_column_parallel_packed
(
dist_init
,
num_loras
,
repeats
,
fully_shard
,
def
test_column_parallel_packed
(
dist_init
,
num_loras
,
repeats
,
fully_shard
,
device
,
stage
,
bias_enabled
)
->
None
:
device
,
stage
)
->
None
:
if
current_platform
.
is_cuda_alike
():
if
current_platform
.
is_cuda_alike
():
torch
.
cuda
.
set_device
(
device
)
torch
.
cuda
.
set_device
(
device
)
...
@@ -876,11 +872,12 @@ def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
...
@@ -876,11 +872,12 @@ def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
torch
.
set_default_device
(
device
)
torch
.
set_default_device
(
device
)
punica_wrapper
=
get_punica_wrapper
(
8192
,
256
,
device
,
max_loras
=
max_loras
)
punica_wrapper
=
get_punica_wrapper
(
8192
,
256
,
device
,
max_loras
=
max_loras
)
assert
check_punica_wrapper
(
punica_wrapper
)
assert
check_punica_wrapper
(
punica_wrapper
)
lora_config
=
LoRAConfig
(
max_loras
=
max_loras
,
lora_config
=
LoRAConfig
(
max_lora_rank
=
8
,
max_loras
=
max_loras
,
fully_sharded_loras
=
fully_shard
,
max_lora_rank
=
8
,
lora_dtype
=
torch
.
float16
,
fully_sharded_loras
=
fully_shard
,
bias_enabled
=
bias_enabled
)
lora_dtype
=
torch
.
float16
,
)
def
create_column_parallel_packed_layer
():
def
create_column_parallel_packed_layer
():
if
repeats
==
2
:
if
repeats
==
2
:
...
@@ -924,10 +921,7 @@ def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
...
@@ -924,10 +921,7 @@ def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
model_config
=
FakeConfig
())
model_config
=
FakeConfig
())
assert
(
lora_linear
.
n_slices
==
len
(
lora_linear
.
lora_a_stacked
)
==
len
(
assert
(
lora_linear
.
n_slices
==
len
(
lora_linear
.
lora_a_stacked
)
==
len
(
lora_linear
.
lora_b_stacked
)
==
n_slices
)
lora_linear
.
lora_b_stacked
)
==
n_slices
)
if
bias_enabled
:
assert
len
(
lora_linear
.
lora_bias_stacked
)
==
lora_linear
.
n_slices
else
:
assert
lora_linear
.
lora_bias_stacked
is
None
return
linear
,
lora_linear
return
linear
,
lora_linear
for
i
in
range
(
NUM_RANDOM_SEEDS
):
for
i
in
range
(
NUM_RANDOM_SEEDS
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment