Unverified Commit 14288d13 authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

Disable enforce_eager for V1 TPU sampler and structured output tests (#17016)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
parent b411418f
...@@ -19,6 +19,7 @@ docker run --privileged --net host --shm-size=16G -it \ ...@@ -19,6 +19,7 @@ docker run --privileged --net host --shm-size=16G -it \
vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \ vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
&& python3 -m pip install pytest pytest-asyncio tpu-info \ && python3 -m pip install pytest pytest-asyncio tpu-info \
&& python3 -m pip install lm_eval[api]==0.4.4 \ && python3 -m pip install lm_eval[api]==0.4.4 \
&& export VLLM_XLA_CACHE_PATH= \
&& export VLLM_USE_V1=1 \ && export VLLM_USE_V1=1 \
&& export VLLM_XLA_CHECK_RECOMPILATION=1 \ && export VLLM_XLA_CHECK_RECOMPILATION=1 \
&& echo HARDWARE \ && echo HARDWARE \
......
...@@ -13,6 +13,7 @@ from pydantic import BaseModel ...@@ -13,6 +13,7 @@ from pydantic import BaseModel
from vllm.entrypoints.llm import LLM from vllm.entrypoints.llm import LLM
from vllm.outputs import RequestOutput from vllm.outputs import RequestOutput
from vllm.platforms import current_platform
from vllm.sampling_params import GuidedDecodingParams, SamplingParams from vllm.sampling_params import GuidedDecodingParams, SamplingParams
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [ PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [
...@@ -63,10 +64,13 @@ def test_structured_output( ...@@ -63,10 +64,13 @@ def test_structured_output(
): ):
monkeypatch.setenv("VLLM_USE_V1", "1") monkeypatch.setenv("VLLM_USE_V1", "1")
# Don't use eager execution on TPUs because we want to test for no
# recompilation at runtime
enforce_eager = bool(not current_platform.is_tpu())
# Use a single LLM instance for several scenarios to # Use a single LLM instance for several scenarios to
# speed up the test suite. # speed up the test suite.
llm = LLM(model=model_name, llm = LLM(model=model_name,
enforce_eager=True, enforce_eager=enforce_eager,
max_model_len=1024, max_model_len=1024,
guided_decoding_backend=guided_decoding_backend, guided_decoding_backend=guided_decoding_backend,
tokenizer_mode=tokenizer_mode) tokenizer_mode=tokenizer_mode)
......
...@@ -23,7 +23,7 @@ def test_sampler_different(model_name: str): ...@@ -23,7 +23,7 @@ def test_sampler_different(model_name: str):
different results. different results.
""" """
llm = LLM(model_name, llm = LLM(model_name,
enforce_eager=True, enforce_eager=False,
max_num_seqs=1, max_num_seqs=1,
max_model_len=512, max_model_len=512,
max_num_batched_tokens=512) max_num_batched_tokens=512)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment