[Bugfix] Further clean up LoRA test (#14422)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>

[Bugfix] Further clean up LoRA test (#14422)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
12c29a88 · Jee Jee Li · GitHub · 70da0c07 · 12c29a88 · 12c29a88
Unverified Commit 12c29a88 authored Mar 07, 2025 by Jee Jee Li Committed by GitHub Mar 07, 2025
Showing with 2 additions and 48 deletions

tests/lora/conftest.py tests/lora/conftest.py +0 -5

tests/lora/test_mixtral.py tests/lora/test_mixtral.py +0 -42

tests/lora/test_quant_model.py tests/lora/test_quant_model.py +2 -1

No files found.
--- a/tests/lora/conftest.py
+++ b/tests/lora/conftest.py
@@ -185,11 +185,6 @@ def mixtral_lora_files():
    return snapshot_download(repo_id="SangBinCho/mixtral-lora")
-@pytest.fixture(scope="session")
-def mixtral_lora_files_all_target_modules():
-    return snapshot_download(repo_id="dyang415/mixtral-lora-v0")
 @pytest.fixture(scope="session")
 def gemma_lora_files():
    return snapshot_download(repo_id="wskwon/gemma-7b-test-lora")

--- a/tests/lora/test_mixtral.py
+++ b/tests/lora/test_mixtral.py
@@ -69,45 +69,3 @@ def test_mixtral_lora(mixtral_lora_files, tp_size):
                     prompts=prompts) == expected_lora_output
    assert do_sample(llm, mixtral_lora_files, lora_id=2,
                     prompts=prompts) == expected_lora_output
-@pytest.mark.parametrize("tp_size", [4])
-@pytest.mark.parametrize("fully_shard", [True, False])
-def test_mixtral_lora_all_target_modules(mixtral_lora_files_all_target_modules,
-                                         tp_size, fully_shard):
-    """This LoRA model has all supported Mixtral target modules"""
-    if torch.cuda.device_count() < tp_size:
-        pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
-    prompts = [
-        "Question: What happens if you touch the eyes of a blind man?\nChoices:\nB: The man's sight will be restored.\nA: Nothing happens if you touch the eyes of a blind man.\nC: The man will get his eyesight back.\nD: The man will no longer be blind.\nAnswer:",  # noqa: E501
-        "Question: Which will increase the energy of motion of water molecules?\nChoices:\nA: add heat\nB: add ice\nC: remove some of the water\nD: place the water in the freezer\nAnswer:",  # noqa: E501
-        "Since Craig threw aluminum cans in the trash and Benjamin recycled, _ was environmentally irresponsible.\nChoices:\n1: Craig\n2: Benjamin\nAnswer:",  # noqa: E501
-    ]
-    llm = vllm.LLM(
-        MODEL_PATH,
-        enable_lora=True,
-        max_num_seqs=16,
-        max_loras=4,
-        distributed_executor_backend="ray",
-        tensor_parallel_size=tp_size,
-        fully_sharded_loras=fully_shard,
-        max_lora_rank=32,
-    )
-    expected_lora_output = [
-        "A: Nothing happens if you touch the eyes of a blind man.",
-        "A: add heat",
-        "1: Craig",
-    ]
-    assert do_sample(llm,
-                     mixtral_lora_files_all_target_modules,
-                     lora_id=1,
-                     prompts=prompts) == expected_lora_output
-    assert do_sample(llm,
-                     mixtral_lora_files_all_target_modules,
-                     lora_id=2,
-                     prompts=prompts) == expected_lora_output
--- a/tests/lora/test_quant_model.py
+++ b/tests/lora/test_quant_model.py
@@ -178,7 +178,8 @@ def test_quant_model_tp_equality(tinyllama_lora_files, num_gpus_available,
                                 model):
    if num_gpus_available < 2:
        pytest.skip(f"Not enough GPUs for tensor parallelism {2}")
+    if model.quantization == "GPTQ":
+        pytest.skip("GPTQ lora outputs are just incredibly unstable")
    llm_tp1 = vllm.LLM(
        model=model.model_path,
        enable_lora=True,