Unverified Commit 4203926f authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[CI/Build] Further clean up LoRA tests (#15920)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent cdb57015
...@@ -289,7 +289,7 @@ steps: ...@@ -289,7 +289,7 @@ steps:
source_file_dependencies: source_file_dependencies:
- vllm/lora - vllm/lora
- tests/lora - tests/lora
command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_minicpmv_tp.py --ignore=lora/test_transfomers_model.py command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py
parallelism: 4 parallelism: 4
- label: PyTorch Fullgraph Smoke Test # 9min - label: PyTorch Fullgraph Smoke Test # 9min
...@@ -602,8 +602,6 @@ steps: ...@@ -602,8 +602,6 @@ steps:
# requires multi-GPU testing for validation. # requires multi-GPU testing for validation.
- pytest -v -s -x lora/test_chatglm3_tp.py - pytest -v -s -x lora/test_chatglm3_tp.py
- pytest -v -s -x lora/test_llama_tp.py - pytest -v -s -x lora/test_llama_tp.py
- pytest -v -s -x lora/test_minicpmv_tp.py
- pytest -v -s -x lora/test_transfomers_model.py
- label: Weight Loading Multiple GPU Test # 33min - label: Weight Loading Multiple GPU Test # 33min
......
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
import tempfile import tempfile
from collections import OrderedDict from collections import OrderedDict
from typing import TypedDict
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
import pytest import pytest
...@@ -26,28 +25,6 @@ from vllm.model_executor.models.interfaces import SupportsLoRA ...@@ -26,28 +25,6 @@ from vllm.model_executor.models.interfaces import SupportsLoRA
from vllm.platforms import current_platform from vllm.platforms import current_platform
class ContextIDInfo(TypedDict):
lora_id: int
context_length: str
class ContextInfo(TypedDict):
lora: str
context_length: str
LONG_LORA_INFOS: list[ContextIDInfo] = [{
"lora_id": 1,
"context_length": "16k",
}, {
"lora_id": 2,
"context_length": "16k",
}, {
"lora_id": 3,
"context_length": "32k",
}]
@pytest.fixture() @pytest.fixture()
def should_do_global_cleanup_after_test(request) -> bool: def should_do_global_cleanup_after_test(request) -> bool:
"""Allow subdirectories to skip global cleanup by overriding this fixture. """Allow subdirectories to skip global cleanup by overriding this fixture.
......
...@@ -59,7 +59,7 @@ DEVICES = ([ ...@@ -59,7 +59,7 @@ DEVICES = ([
# prefill stage(True) or decode stage(False) # prefill stage(True) or decode stage(False)
STAGES = [True, False] STAGES = [True, False]
NUM_RANDOM_SEEDS = 10 NUM_RANDOM_SEEDS = 6
VOCAB_PARALLEL_EMBEDDING_TEST_NUM_RANDOM_SEEDS = 128 VOCAB_PARALLEL_EMBEDDING_TEST_NUM_RANDOM_SEEDS = 128
......
...@@ -153,20 +153,3 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files): ...@@ -153,20 +153,3 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
enable_chunked_prefill=True, enable_chunked_prefill=True,
) )
generate_and_test(llm, sql_lora_files) generate_and_test(llm, sql_lora_files)
@multi_gpu_test(num_gpus=4)
@create_new_process_for_each_test()
def test_llama_lora_tp4_fully_sharded_enable_bias(sql_lora_files):
llm = vllm.LLM(
MODEL_PATH,
enable_lora=True,
max_num_seqs=16,
max_loras=4,
tensor_parallel_size=4,
fully_sharded_loras=True,
enable_lora_bias=True,
enable_chunked_prefill=True,
)
generate_and_test(llm, sql_lora_files)
...@@ -58,7 +58,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]: ...@@ -58,7 +58,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
@pytest.mark.xfail( @pytest.mark.xfail(
current_platform.is_rocm(), current_platform.is_rocm(),
reason="MiniCPM-V dependency xformers incompatible with ROCm") reason="MiniCPM-V dependency xformers incompatible with ROCm")
@create_new_process_for_each_test()
def test_minicpmv_lora(minicpmv_lora_files): def test_minicpmv_lora(minicpmv_lora_files):
llm = vllm.LLM( llm = vllm.LLM(
MODEL_PATH, MODEL_PATH,
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import pytest
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.platforms import current_platform
from ..utils import create_new_process_for_each_test, multi_gpu_test from ..utils import create_new_process_for_each_test, multi_gpu_test
...@@ -44,7 +47,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]: ...@@ -44,7 +47,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
return generated_texts return generated_texts
@create_new_process_for_each_test()
def test_ilama_lora(ilama_lora_files): def test_ilama_lora(ilama_lora_files):
llm = vllm.LLM(MODEL_PATH, llm = vllm.LLM(MODEL_PATH,
max_model_len=1024, max_model_len=1024,
...@@ -63,6 +65,8 @@ def test_ilama_lora(ilama_lora_files): ...@@ -63,6 +65,8 @@ def test_ilama_lora(ilama_lora_files):
assert output2[i] == EXPECTED_LORA_OUTPUT[i] assert output2[i] == EXPECTED_LORA_OUTPUT[i]
@pytest.mark.skipif(current_platform.is_cuda_alike(),
reason="Skipping to avoid redundant model tests")
@multi_gpu_test(num_gpus=4) @multi_gpu_test(num_gpus=4)
@create_new_process_for_each_test() @create_new_process_for_each_test()
def test_ilama_lora_tp4(ilama_lora_files): def test_ilama_lora_tp4(ilama_lora_files):
...@@ -84,6 +88,8 @@ def test_ilama_lora_tp4(ilama_lora_files): ...@@ -84,6 +88,8 @@ def test_ilama_lora_tp4(ilama_lora_files):
assert output2[i] == EXPECTED_LORA_OUTPUT[i] assert output2[i] == EXPECTED_LORA_OUTPUT[i]
@pytest.mark.skipif(current_platform.is_cuda_alike(),
reason="Skipping to avoid redundant model tests")
@multi_gpu_test(num_gpus=4) @multi_gpu_test(num_gpus=4)
@create_new_process_for_each_test() @create_new_process_for_each_test()
def test_ilama_lora_tp4_fully_sharded_loras(ilama_lora_files): def test_ilama_lora_tp4_fully_sharded_loras(ilama_lora_files):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment