[CI/Build] Cleanup LoRA test (#26752)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>

[CI/Build] Cleanup LoRA test (#26752)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
fdd32750 · Jee Jee Li · GitHub · c715ba37 · fdd32750 · fdd32750
Unverified Commit fdd32750 authored Oct 14, 2025 by Jee Jee Li Committed by GitHub Oct 14, 2025
Showing with 3 additions and 11 deletions

tests/lora/test_chatglm3_tp.py tests/lora/test_chatglm3_tp.py +0 -5

tests/lora/test_llama_tp.py tests/lora/test_llama_tp.py +0 -3

tests/lora/test_minicpmv_tp.py tests/lora/test_minicpmv_tp.py +3 -3

No files found.
--- a/tests/lora/test_chatglm3_tp.py
+++ b/tests/lora/test_chatglm3_tp.py
@@ -58,7 +58,6 @@ def test_chatglm3_lora(chatglm3_lora_files):
        max_loras=4,
        max_lora_rank=64,
        trust_remote_code=True,
-        enable_chunked_prefill=True,
    )

    output1 = do_sample(llm, chatglm3_lora_files, lora_id=1)
@@ -70,7 +69,6 @@ def test_chatglm3_lora(chatglm3_lora_files):


 @multi_gpu_test(num_gpus=4)
-@create_new_process_for_each_test()
 def test_chatglm3_lora_tp4(chatglm3_lora_files):
    llm = vllm.LLM(
        MODEL_PATH,
@@ -81,7 +79,6 @@ def test_chatglm3_lora_tp4(chatglm3_lora_files):
        tensor_parallel_size=4,
        trust_remote_code=True,
        fully_sharded_loras=False,
-        enable_chunked_prefill=True,
    )

    output1 = do_sample(llm, chatglm3_lora_files, lora_id=1)
@@ -93,7 +90,6 @@ def test_chatglm3_lora_tp4(chatglm3_lora_files):


 @multi_gpu_test(num_gpus=4)
-@create_new_process_for_each_test()
 def test_chatglm3_lora_tp4_fully_sharded_loras(chatglm3_lora_files):
    # https://github.com/NVIDIA/nccl/issues/1790, set a lower value for
    # gpu_memory_utilization here because NCCL >= 2.26.3 seems to use
@@ -107,7 +103,6 @@ def test_chatglm3_lora_tp4_fully_sharded_loras(chatglm3_lora_files):
        tensor_parallel_size=4,
        trust_remote_code=True,
        fully_sharded_loras=True,
-        enable_chunked_prefill=True,
        gpu_memory_utilization=0.85,
    )
    output1 = do_sample(llm, chatglm3_lora_files, lora_id=1)

--- a/tests/lora/test_llama_tp.py
+++ b/tests/lora/test_llama_tp.py
@@ -113,7 +113,6 @@ def test_llama_lora(sql_lora_files):


 @multi_gpu_test(num_gpus=4)
-@create_new_process_for_each_test()
 def test_llama_lora_tp4(sql_lora_files):
    llm = vllm.LLM(
        MODEL_PATH,
@@ -127,7 +126,6 @@ def test_llama_lora_tp4(sql_lora_files):


 @multi_gpu_test(num_gpus=4)
-@create_new_process_for_each_test()
 def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
    llm = vllm.LLM(
        MODEL_PATH,
@@ -142,7 +140,6 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):


 @multi_gpu_test(num_gpus=2)
-@create_new_process_for_each_test()
 def test_tp2_serialize_and_deserialize_lora(
    tmp_path, sql_lora_files, sql_lora_huggingface_id
 ):

--- a/tests/lora/test_minicpmv_tp.py
+++ b/tests/lora/test_minicpmv_tp.py
@@ -8,7 +8,7 @@ from vllm.assets.image import ImageAsset
 from vllm.lora.request import LoRARequest
 from vllm.platforms import current_platform

-from ..utils import create_new_process_for_each_test
+from ..utils import multi_gpu_test

 MODEL_PATH = "openbmb/MiniCPM-Llama3-V-2_5"

@@ -88,7 +88,7 @@ def test_minicpmv_lora(minicpmv_lora_files):
    current_platform.is_rocm(),
    reason="MiniCPM-V dependency xformers incompatible with ROCm",
 )
-@create_new_process_for_each_test()
+@multi_gpu_test(num_gpus=4)
 def test_minicpmv_tp4_wo_fully_sharded_loras(minicpmv_lora_files):
    llm = vllm.LLM(
        MODEL_PATH,
@@ -112,7 +112,7 @@ def test_minicpmv_tp4_wo_fully_sharded_loras(minicpmv_lora_files):
    current_platform.is_rocm(),
    reason="MiniCPM-V dependency xformers incompatible with ROCm",
 )
-@create_new_process_for_each_test()
+@multi_gpu_test(num_gpus=4)
 def test_minicpmv_tp4_fully_sharded_loras(minicpmv_lora_files):
    llm = vllm.LLM(
        MODEL_PATH,