Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8a49eea7
Unverified
Commit
8a49eea7
authored
Mar 27, 2025
by
Robert Shaw
Committed by
GitHub
Mar 27, 2025
Browse files
[CI][TPU] Temporarily Disable Quant Test on TPU (#15649)
Signed-off-by:
rshaw@neuralmagic.com
<
robertgshaw2@gmail.com
>
parent
b4245a48
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
7 deletions
+5
-7
.buildkite/run-tpu-v1-test.sh
.buildkite/run-tpu-v1-test.sh
+5
-4
tests/v1/tpu/test_basic.py
tests/v1/tpu/test_basic.py
+0
-3
No files found.
.buildkite/run-tpu-v1-test.sh
View file @
8a49eea7
...
@@ -28,15 +28,16 @@ docker run --privileged --net host --shm-size=16G -it \
...
@@ -28,15 +28,16 @@ docker run --privileged --net host --shm-size=16G -it \
&& echo TEST_3
\
&& echo TEST_3
\
&& pytest -v -s /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine
\
&& pytest -v -s /workspace/vllm/tests/entrypoints/llm/test_accuracy.py::test_lm_eval_accuracy_v1_engine
\
&& echo TEST_4
\
&& echo TEST_4
\
&& pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py
\
&& echo TEST_5
\
&& python3 /workspace/vllm/examples/offline_inference/tpu.py
\
&& python3 /workspace/vllm/examples/offline_inference/tpu.py
\
&& echo TEST_
6
\
&& echo TEST_
5
\
&& pytest -s -v /workspace/vllm/tests/tpu/worker/test_tpu_model_runner.py
\
&& pytest -s -v /workspace/vllm/tests/tpu/worker/test_tpu_model_runner.py
\
&& echo TEST_
7
\
&& echo TEST_
6
\
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py"
\
&& pytest -s -v /workspace/vllm/tests/v1/tpu/test_sampler.py"
\
# TODO: This test fails because it uses RANDOM_SEED sampling
# TODO: This test fails because it uses RANDOM_SEED sampling
# && VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
# && VLLM_USE_V1=1 pytest -v -s /workspace/vllm/tests/tpu/test_custom_dispatcher.py \
# TODO: Re-enable this after fixing recompilation in quantization.
# && echo TEST_4 \
# && pytest -s -v /workspace/vllm/tests/tpu/test_quantization_accuracy.py \
tests/v1/tpu/test_basic.py
View file @
8a49eea7
...
@@ -31,14 +31,12 @@ TENSOR_PARALLEL_SIZES = [1]
...
@@ -31,14 +31,12 @@ TENSOR_PARALLEL_SIZES = [1]
reason
=
"This is a basic test for TPU only"
)
reason
=
"This is a basic test for TPU only"
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"model"
,
MODELS
)
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"enforce_eager"
,
[
True
])
@
pytest
.
mark
.
parametrize
(
"tensor_parallel_size"
,
TENSOR_PARALLEL_SIZES
)
@
pytest
.
mark
.
parametrize
(
"tensor_parallel_size"
,
TENSOR_PARALLEL_SIZES
)
def
test_models
(
def
test_models
(
vllm_runner
:
type
[
VllmRunner
],
vllm_runner
:
type
[
VllmRunner
],
monkeypatch
:
pytest
.
MonkeyPatch
,
monkeypatch
:
pytest
.
MonkeyPatch
,
model
:
str
,
model
:
str
,
max_tokens
:
int
,
max_tokens
:
int
,
enforce_eager
:
bool
,
tensor_parallel_size
:
int
,
tensor_parallel_size
:
int
,
)
->
None
:
)
->
None
:
prompt
=
"The next numbers of the sequence "
+
", "
.
join
(
prompt
=
"The next numbers of the sequence "
+
", "
.
join
(
...
@@ -51,7 +49,6 @@ def test_models(
...
@@ -51,7 +49,6 @@ def test_models(
with
vllm_runner
(
with
vllm_runner
(
model
,
model
,
max_model_len
=
8192
,
max_model_len
=
8192
,
enforce_eager
=
enforce_eager
,
gpu_memory_utilization
=
0.7
,
gpu_memory_utilization
=
0.7
,
max_num_seqs
=
16
,
max_num_seqs
=
16
,
tensor_parallel_size
=
tensor_parallel_size
)
as
vllm_model
:
tensor_parallel_size
=
tensor_parallel_size
)
as
vllm_model
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment