Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
14288d13
Unverified
Commit
14288d13
authored
Apr 24, 2025
by
Michael Goin
Committed by
GitHub
Apr 24, 2025
Browse files
Disable enforce_eager for V1 TPU sampler and structured output tests (#17016)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
b411418f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
7 additions
and
2 deletions
+7
-2
.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh
.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh
+1
-0
tests/v1/entrypoints/llm/test_struct_output_generate.py
tests/v1/entrypoints/llm/test_struct_output_generate.py
+5
-1
tests/v1/tpu/test_sampler.py
tests/v1/tpu/test_sampler.py
+1
-1
No files found.
.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh
View file @
14288d13
...
@@ -19,6 +19,7 @@ docker run --privileged --net host --shm-size=16G -it \
...
@@ -19,6 +19,7 @@ docker run --privileged --net host --shm-size=16G -it \
vllm-tpu /bin/bash
-c
"python3 -m pip install git+https://github.com/thuml/depyf.git
\
vllm-tpu /bin/bash
-c
"python3 -m pip install git+https://github.com/thuml/depyf.git
\
&& python3 -m pip install pytest pytest-asyncio tpu-info
\
&& python3 -m pip install pytest pytest-asyncio tpu-info
\
&& python3 -m pip install lm_eval[api]==0.4.4
\
&& python3 -m pip install lm_eval[api]==0.4.4
\
&& export VLLM_XLA_CACHE_PATH=
\
&& export VLLM_USE_V1=1
\
&& export VLLM_USE_V1=1
\
&& export VLLM_XLA_CHECK_RECOMPILATION=1
\
&& export VLLM_XLA_CHECK_RECOMPILATION=1
\
&& echo HARDWARE
\
&& echo HARDWARE
\
...
...
tests/v1/entrypoints/llm/test_struct_output_generate.py
View file @
14288d13
...
@@ -13,6 +13,7 @@ from pydantic import BaseModel
...
@@ -13,6 +13,7 @@ from pydantic import BaseModel
from
vllm.entrypoints.llm
import
LLM
from
vllm.entrypoints.llm
import
LLM
from
vllm.outputs
import
RequestOutput
from
vllm.outputs
import
RequestOutput
from
vllm.platforms
import
current_platform
from
vllm.sampling_params
import
GuidedDecodingParams
,
SamplingParams
from
vllm.sampling_params
import
GuidedDecodingParams
,
SamplingParams
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE
=
[
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE
=
[
...
@@ -63,10 +64,13 @@ def test_structured_output(
...
@@ -63,10 +64,13 @@ def test_structured_output(
):
):
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
monkeypatch
.
setenv
(
"VLLM_USE_V1"
,
"1"
)
# Don't use eager execution on TPUs because we want to test for no
# recompilation at runtime
enforce_eager
=
bool
(
not
current_platform
.
is_tpu
())
# Use a single LLM instance for several scenarios to
# Use a single LLM instance for several scenarios to
# speed up the test suite.
# speed up the test suite.
llm
=
LLM
(
model
=
model_name
,
llm
=
LLM
(
model
=
model_name
,
enforce_eager
=
True
,
enforce_eager
=
enforce_eager
,
max_model_len
=
1024
,
max_model_len
=
1024
,
guided_decoding_backend
=
guided_decoding_backend
,
guided_decoding_backend
=
guided_decoding_backend
,
tokenizer_mode
=
tokenizer_mode
)
tokenizer_mode
=
tokenizer_mode
)
...
...
tests/v1/tpu/test_sampler.py
View file @
14288d13
...
@@ -23,7 +23,7 @@ def test_sampler_different(model_name: str):
...
@@ -23,7 +23,7 @@ def test_sampler_different(model_name: str):
different results.
different results.
"""
"""
llm
=
LLM
(
model_name
,
llm
=
LLM
(
model_name
,
enforce_eager
=
Tru
e
,
enforce_eager
=
Fals
e
,
max_num_seqs
=
1
,
max_num_seqs
=
1
,
max_model_len
=
512
,
max_model_len
=
512
,
max_num_batched_tokens
=
512
)
max_num_batched_tokens
=
512
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment