[CI/Build] Reduce LoRA layer test cases (#23721)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>

[CI/Build] Reduce LoRA layer test cases (#23721)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
e0394076 · Jee Jee Li · GitHub · 11eddf02 · e0394076
Unverified Commit e0394076 authored Aug 27, 2025 by Jee Jee Li Committed by GitHub Aug 27, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 33 additions and 39 deletions

tests/lora/test_layers.py tests/lora/test_layers.py +33 -39

No files found.
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@@ -243,7 +243,7 @@ def check_punica_wrapper(punica_wrapper) -> bool:
 @torch.inference_mode()
-@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
+@pytest.mark.parametrize("num_loras", [1, 2, 4])
 @pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 128000])
 @pytest.mark.parametrize("stage", STAGES)
@@ -347,7 +347,7 @@ def test_embeddings(dist_init, num_loras, device, vocab_size, stage) -> None:
 @torch.inference_mode()
 # @pytest.mark.skip(
 #     reason="Fails when loras are in any slot other than the first.")
-@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
+@pytest.mark.parametrize("num_loras", [1, 2, 4])
 @pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 128000])
 @pytest.mark.parametrize("stage", STAGES)
@@ -486,7 +486,7 @@ def test_embeddings_with_new_embeddings(dist_init, num_loras, device,
 @torch.inference_mode()
-@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
+@pytest.mark.parametrize("num_loras", [1, 2, 4])
 @pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("vocab_size", [512, 32000, 64000, 256512])
 @pytest.mark.parametrize("stage", STAGES)
@@ -620,12 +620,15 @@ def test_lm_head_logits_processor(dist_init, num_loras, device, vocab_size,
 @torch.inference_mode()
-@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
+@pytest.mark.parametrize("num_loras", [1, 2, 4])
 @pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("stage", STAGES)
-@pytest.mark.parametrize("bias_enabled", [True, False])
+def test_linear_replicated(
-def test_linear_replicated(dist_init, num_loras, device, stage,
+    dist_init,
-                           bias_enabled) -> None:
+    num_loras,
+    device,
+    stage,
+) -> None:
    if current_platform.is_cuda_alike():
        torch.cuda.set_device(device)
@@ -634,10 +637,11 @@ def test_linear_replicated(dist_init, num_loras, device, stage,
    torch.set_default_device(device)
    punica_wrapper = get_punica_wrapper(8192, 256, device, max_loras=max_loras)
    assert check_punica_wrapper(punica_wrapper)
-    lora_config = LoRAConfig(max_loras=max_loras,
+    lora_config = LoRAConfig(
-                             max_lora_rank=8,
+        max_loras=max_loras,
-                             lora_dtype=torch.float16,
+        max_lora_rank=8,
-                             bias_enabled=bias_enabled)
+        lora_dtype=torch.float16,
+    )
    def create_random_linear_replicated_layer():
@@ -651,10 +655,6 @@ def test_linear_replicated(dist_init, num_loras, device, stage,
        lora_linear.create_lora_weights(max_loras, lora_config)
        assert (lora_linear.n_slices == len(lora_linear.lora_a_stacked) == len(
            lora_linear.lora_b_stacked) == 1)
-        if bias_enabled:
-            assert len(lora_linear.lora_bias_stacked) == lora_linear.n_slices
-        else:
-            assert lora_linear.lora_bias_stacked is None
        return linear, lora_linear
    for i in range(NUM_RANDOM_SEEDS):
@@ -734,14 +734,13 @@ def test_linear_replicated(dist_init, num_loras, device, stage,
 @torch.inference_mode()
-@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
+@pytest.mark.parametrize("num_loras", [1, 2, 4])
 @pytest.mark.parametrize("orientation", ["row", "column"])
 @pytest.mark.parametrize("fully_shard", [True, False])
 @pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("stage", STAGES)
-@pytest.mark.parametrize("bias_enabled", [True, False])
 def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
-                         device, stage, bias_enabled) -> None:
+                         device, stage) -> None:
    if current_platform.is_cuda_alike():
        torch.cuda.set_device(device)
@@ -750,11 +749,12 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
    torch.set_default_device(device)
    punica_wrapper = get_punica_wrapper(8192, 256, device, max_loras=max_loras)
    assert check_punica_wrapper(punica_wrapper)
-    lora_config = LoRAConfig(max_loras=max_loras,
+    lora_config = LoRAConfig(
-                             max_lora_rank=8,
+        max_loras=max_loras,
-                             fully_sharded_loras=fully_shard,
+        max_lora_rank=8,
-                             lora_dtype=torch.float16,
+        fully_sharded_loras=fully_shard,
-                             bias_enabled=bias_enabled)
+        lora_dtype=torch.float16,
+    )
    def create_random_linear_parallel_layer():
        if orientation == "row":
@@ -777,10 +777,7 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
        lora_linear.create_lora_weights(max_loras, lora_config)
        assert (lora_linear.n_slices == len(lora_linear.lora_a_stacked) == len(
            lora_linear.lora_b_stacked) == 1)
-        if bias_enabled:
-            assert len(lora_linear.lora_bias_stacked) == lora_linear.n_slices
-        else:
-            assert lora_linear.lora_bias_stacked is None
        return linear, lora_linear
    for i in range(NUM_RANDOM_SEEDS):
@@ -860,14 +857,13 @@ def test_linear_parallel(dist_init, num_loras, orientation, fully_shard,
 @torch.inference_mode()
-@pytest.mark.parametrize("num_loras", [1, 2, 4, 8])
+@pytest.mark.parametrize("num_loras", [1, 2, 4])
 @pytest.mark.parametrize("repeats", [1, 2, 3])
 @pytest.mark.parametrize("fully_shard", [True, False])
 @pytest.mark.parametrize("device", DEVICES)
 @pytest.mark.parametrize("stage", STAGES)
-@pytest.mark.parametrize("bias_enabled", [True, False])
 def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
-                                device, stage, bias_enabled) -> None:
+                                device, stage) -> None:
    if current_platform.is_cuda_alike():
        torch.cuda.set_device(device)
@@ -876,11 +872,12 @@ def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
    torch.set_default_device(device)
    punica_wrapper = get_punica_wrapper(8192, 256, device, max_loras=max_loras)
    assert check_punica_wrapper(punica_wrapper)
-    lora_config = LoRAConfig(max_loras=max_loras,
+    lora_config = LoRAConfig(
-                             max_lora_rank=8,
+        max_loras=max_loras,
-                             fully_sharded_loras=fully_shard,
+        max_lora_rank=8,
-                             lora_dtype=torch.float16,
+        fully_sharded_loras=fully_shard,
-                             bias_enabled=bias_enabled)
+        lora_dtype=torch.float16,
+    )
    def create_column_parallel_packed_layer():
        if repeats == 2:
@@ -924,10 +921,7 @@ def test_column_parallel_packed(dist_init, num_loras, repeats, fully_shard,
                                        model_config=FakeConfig())
        assert (lora_linear.n_slices == len(lora_linear.lora_a_stacked) == len(
            lora_linear.lora_b_stacked) == n_slices)
-        if bias_enabled:
-            assert len(lora_linear.lora_bias_stacked) == lora_linear.n_slices
-        else:
-            assert lora_linear.lora_bias_stacked is None
        return linear, lora_linear
    for i in range(NUM_RANDOM_SEEDS):