Unverified Commit 4e68ae5e authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[CI/Build] Remove V0 LoRA test (#19066)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent 4e88723f
...@@ -6,6 +6,8 @@ import pytest ...@@ -6,6 +6,8 @@ import pytest
import vllm.envs as env import vllm.envs as env
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)
from vllm.inputs import TextPrompt from vllm.inputs import TextPrompt
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
...@@ -16,14 +18,6 @@ LORA_RANK = 64 ...@@ -16,14 +18,6 @@ LORA_RANK = 64
DEFAULT_MAX_LORAS = 4 * 3 DEFAULT_MAX_LORAS = 4 * 3
@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def get_lora_requests(lora_path) -> list[LoRARequest]: def get_lora_requests(lora_path) -> list[LoRARequest]:
lora_requests: list[LoRARequest] = [ lora_requests: list[LoRARequest] = [
LoRARequest(lora_name=f"{i}", lora_int_id=i, lora_path=lora_path) LoRARequest(lora_name=f"{i}", lora_int_id=i, lora_path=lora_path)
...@@ -88,17 +82,6 @@ async def test_add_lora(chatglm3_lora_files): ...@@ -88,17 +82,6 @@ async def test_add_lora(chatglm3_lora_files):
trust_remote_code=True, trust_remote_code=True,
enforce_eager=True) enforce_eager=True)
# The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1`
# environment variable. reload vllm.enging.async_llm_engine as
# vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the
# env var.
import importlib
import vllm.engine.async_llm_engine
importlib.reload(vllm.engine.async_llm_engine)
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)
# split lora_requests into 3 parts # split lora_requests into 3 parts
part_size = len(lora_requests) // 3 part_size = len(lora_requests) // 3
dummy_run_requests = lora_requests[:part_size] dummy_run_requests = lora_requests[:part_size]
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import pytest
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
...@@ -18,14 +16,6 @@ EXPECTED_LORA_OUTPUT = [ ...@@ -18,14 +16,6 @@ EXPECTED_LORA_OUTPUT = [
] ]
@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]: def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
prompts = [ prompts = [
PROMPT_TEMPLATE.format(query="How many singers do we have?"), PROMPT_TEMPLATE.format(query="How many singers do we have?"),
......
...@@ -33,14 +33,6 @@ EXPECTED_LORA_OUTPUT = [ ...@@ -33,14 +33,6 @@ EXPECTED_LORA_OUTPUT = [
] ]
@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def do_sample(llm: vllm.LLM, def do_sample(llm: vllm.LLM,
lora_path: str, lora_path: str,
lora_id: int, lora_id: int,
......
...@@ -2,26 +2,24 @@ ...@@ -2,26 +2,24 @@
""" """
Script to test add_lora, remove_lora, pin_lora, list_loras functions. Script to test add_lora, remove_lora, pin_lora, list_loras functions.
""" """
import os
import pytest import pytest
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.llm_engine import LLMEngine from vllm.engine.llm_engine import LLMEngine
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
MODEL_PATH = "meta-llama/Llama-2-7b-hf" MODEL_PATH = "meta-llama/Llama-2-7b-hf"
LORA_MODULE_PATH = "yard1/llama-2-7b-sql-lora-test" LORA_MODULE_PATH = "yard1/llama-2-7b-sql-lora-test"
LORA_RANK = 8 LORA_RANK = 8
# @pytest.fixture(autouse=True)
@pytest.fixture(autouse=True) # def v1(run_with_both_engines_lora):
def v1(run_with_both_engines_lora): # # Simple autouse wrapper to run both engines for each test
# Simple autouse wrapper to run both engines for each test # # This can be promoted up to conftest.py to run for every
# This can be promoted up to conftest.py to run for every # # test in a package
# test in a package # pass
pass
def make_lora_request(lora_id: int): def make_lora_request(lora_id: int):
...@@ -79,22 +77,6 @@ def test_lora_functions_sync(): ...@@ -79,22 +77,6 @@ def test_lora_functions_sync():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_lora_functions_async(): async def test_lora_functions_async():
if os.getenv("VLLM_USE_V1") == "0":
pytest.skip(
reason=
"V0 AsyncLLMEngine does not expose remove/list/pin LoRA functions")
# The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1`
# environment variable. reload vllm.enging.async_llm_engine as
# vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the
# env var.
import importlib
import vllm.engine.async_llm_engine
importlib.reload(vllm.engine.async_llm_engine)
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)
max_loras = 4 max_loras = 4
engine_args = AsyncEngineArgs(model=MODEL_PATH, engine_args = AsyncEngineArgs(model=MODEL_PATH,
enable_lora=True, enable_lora=True,
......
...@@ -10,14 +10,6 @@ from vllm.platforms import current_platform ...@@ -10,14 +10,6 @@ from vllm.platforms import current_platform
MODEL_PATH = "mistralai/Mixtral-8x7B-Instruct-v0.1" MODEL_PATH = "mistralai/Mixtral-8x7B-Instruct-v0.1"
@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int, def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int,
prompts: list[str]) -> list[str]: prompts: list[str]) -> list[str]:
......
...@@ -37,14 +37,6 @@ else: ...@@ -37,14 +37,6 @@ else:
] ]
@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def do_sample(llm: vllm.LLM, def do_sample(llm: vllm.LLM,
lora_path: str, lora_path: str,
lora_id: int, lora_id: int,
......
...@@ -13,14 +13,6 @@ from vllm.platforms import current_platform ...@@ -13,14 +13,6 @@ from vllm.platforms import current_platform
from vllm.sampling_params import BeamSearchParams from vllm.sampling_params import BeamSearchParams
@pytest.fixture(autouse=not current_platform.is_cpu())
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
@dataclass @dataclass
class TestConfig: class TestConfig:
model_path: str model_path: str
......
...@@ -6,8 +6,6 @@ import tempfile ...@@ -6,8 +6,6 @@ import tempfile
from typing import Union from typing import Union
from unittest.mock import patch from unittest.mock import patch
import pytest
import vllm.envs as envs import vllm.envs as envs
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig, from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
ModelConfig, ParallelConfig, SchedulerConfig, ModelConfig, ParallelConfig, SchedulerConfig,
...@@ -18,14 +16,6 @@ from vllm.v1.worker.gpu_worker import Worker as V1Worker ...@@ -18,14 +16,6 @@ from vllm.v1.worker.gpu_worker import Worker as V1Worker
from vllm.worker.worker import Worker from vllm.worker.worker import Worker
@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
@patch.dict(os.environ, {"RANK": "0"}) @patch.dict(os.environ, {"RANK": "0"})
def test_worker_apply_lora(sql_lora_files): def test_worker_apply_lora(sql_lora_files):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment