Unverified Commit 61c7a1b8 authored by Murali Andoorveedu's avatar Murali Andoorveedu Committed by GitHub
Browse files

[V1] Minor V1 async engine test refactor (#15075)


Signed-off-by: default avatarandoorve <murali.andoorveedu@mail.utoronto.ca>
Co-authored-by: default avatarandoorve <murali.andoorveedu@mail.utoronto.ca>
parent 374ee287
...@@ -76,21 +76,18 @@ async def generate(engine: AsyncLLM, ...@@ -76,21 +76,18 @@ async def generate(engine: AsyncLLM,
@pytest.mark.parametrize( @pytest.mark.parametrize(
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY]) "output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
@pytest.mark.parametrize("engine_args_and_prompt", @pytest.mark.parametrize("engine_args,prompt",
[(TEXT_ENGINE_ARGS, TEXT_PROMPT), [(TEXT_ENGINE_ARGS, TEXT_PROMPT),
(VISION_ENGINE_ARGS, VISION_PROMPT)]) (VISION_ENGINE_ARGS, VISION_PROMPT)])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_load( async def test_load(monkeypatch: pytest.MonkeyPatch,
monkeypatch: pytest.MonkeyPatch,
output_kind: RequestOutputKind, output_kind: RequestOutputKind,
engine_args_and_prompt: tuple[AsyncEngineArgs, PromptType], engine_args: AsyncEngineArgs, prompt: PromptType):
):
# TODO(rickyx): Remove monkeypatch once we have a better way to test V1 # TODO(rickyx): Remove monkeypatch once we have a better way to test V1
# so that in the future when we switch, we don't have to change all the # so that in the future when we switch, we don't have to change all the
# tests. # tests.
with monkeypatch.context() as m, ExitStack() as after: with monkeypatch.context() as m, ExitStack() as after:
m.setenv("VLLM_USE_V1", "1") m.setenv("VLLM_USE_V1", "1")
engine_args, prompt = engine_args_and_prompt
engine = AsyncLLM.from_engine_args(engine_args) engine = AsyncLLM.from_engine_args(engine_args)
after.callback(engine.shutdown) after.callback(engine.shutdown)
...@@ -124,18 +121,16 @@ async def test_load( ...@@ -124,18 +121,16 @@ async def test_load(
@pytest.mark.parametrize( @pytest.mark.parametrize(
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY]) "output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
@pytest.mark.parametrize("engine_args_and_prompt", @pytest.mark.parametrize("engine_args,prompt",
[(TEXT_ENGINE_ARGS, TEXT_PROMPT), [(TEXT_ENGINE_ARGS, TEXT_PROMPT),
(VISION_ENGINE_ARGS, VISION_PROMPT)]) (VISION_ENGINE_ARGS, VISION_PROMPT)])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_abort(monkeypatch: pytest.MonkeyPatch, async def test_abort(monkeypatch: pytest.MonkeyPatch,
output_kind: RequestOutputKind, output_kind: RequestOutputKind,
engine_args_and_prompt: tuple[AsyncEngineArgs, engine_args: AsyncEngineArgs, prompt: PromptType):
PromptType]):
with monkeypatch.context() as m, ExitStack() as after: with monkeypatch.context() as m, ExitStack() as after:
m.setenv("VLLM_USE_V1", "1") m.setenv("VLLM_USE_V1", "1")
engine_args, prompt = engine_args_and_prompt
engine = AsyncLLM.from_engine_args(engine_args) engine = AsyncLLM.from_engine_args(engine_args)
after.callback(engine.shutdown) after.callback(engine.shutdown)
...@@ -193,17 +188,15 @@ async def test_abort(monkeypatch: pytest.MonkeyPatch, ...@@ -193,17 +188,15 @@ async def test_abort(monkeypatch: pytest.MonkeyPatch,
@pytest.mark.parametrize("n", [1, 3]) @pytest.mark.parametrize("n", [1, 3])
@pytest.mark.parametrize("engine_args_and_prompt", @pytest.mark.parametrize("engine_args,prompt",
[(TEXT_ENGINE_ARGS, TEXT_PROMPT), [(TEXT_ENGINE_ARGS, TEXT_PROMPT),
(VISION_ENGINE_ARGS, VISION_PROMPT)]) (VISION_ENGINE_ARGS, VISION_PROMPT)])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_finished_flag(monkeypatch, n: int, async def test_finished_flag(monkeypatch: pytest.MonkeyPatch, n: int,
engine_args_and_prompt: tuple[AsyncEngineArgs, engine_args: AsyncEngineArgs, prompt: PromptType):
PromptType]):
with monkeypatch.context() as m, ExitStack() as after: with monkeypatch.context() as m, ExitStack() as after:
m.setenv("VLLM_USE_V1", "1") m.setenv("VLLM_USE_V1", "1")
engine_args, prompt = engine_args_and_prompt
engine = AsyncLLM.from_engine_args(engine_args) engine = AsyncLLM.from_engine_args(engine_args)
after.callback(engine.shutdown) after.callback(engine.shutdown)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment