Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
72dd1595
Unverified
Commit
72dd1595
authored
Sep 20, 2025
by
Woosuk Kwon
Committed by
GitHub
Sep 20, 2025
Browse files
[CI] Skip tests failing on main (#25326)
Signed-off-by:
Woosuk Kwon
<
woosuk.kwon@berkeley.edu
>
parent
572ddf83
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
10 additions
and
2 deletions
+10
-2
tests/entrypoints/openai/test_completion_with_prompt_embeds.py
.../entrypoints/openai/test_completion_with_prompt_embeds.py
+1
-0
tests/models/quantization/test_fp8.py
tests/models/quantization/test_fp8.py
+4
-1
tests/models/test_oot_registration.py
tests/models/test_oot_registration.py
+1
-0
tests/quantization/test_compressed_tensors.py
tests/quantization/test_compressed_tensors.py
+4
-1
No files found.
tests/entrypoints/openai/test_completion_with_prompt_embeds.py
View file @
72dd1595
...
...
@@ -60,6 +60,7 @@ def create_dummy_embeds(num_tokens: int = 5) -> str:
return
base64
.
b64encode
(
buffer
.
getvalue
()).
decode
(
'utf-8'
)
@
pytest
.
mark
.
skip
(
"This test is skipped because it is flaky."
)
@
pytest
.
mark
.
asyncio
@
pytest
.
mark
.
parametrize
(
"model_name"
,
[
MODEL_NAME
])
async
def
test_completions_with_prompt_embeds
(
...
...
tests/models/quantization/test_fp8.py
View file @
72dd1595
...
...
@@ -32,7 +32,7 @@ from ..utils import check_logprobs_close
# Due to low-precision numerical divergence, we only test logprob of 4 tokens
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
4
])
@
pytest
.
mark
.
parametrize
(
"enforce_eager"
,
[
True
])
@
pytest
.
mark
.
parametrize
(
"backend"
,
[
"FLASH_ATTN"
,
"XFORMERS"
])
@
pytest
.
mark
.
parametrize
(
"backend"
,
[
"FLASH_ATTN"
])
# NOTE: Increasing this in this suite will fail CI because we currently cannot
# reset distributed env properly. Use a value > 1 just when you test.
@
pytest
.
mark
.
parametrize
(
"tensor_parallel_size"
,
[
1
])
...
...
@@ -57,6 +57,9 @@ def test_models(
pytest
.
skip
(
f
"
{
kv_cache_dtype
}
is currently not supported on ROCm/HIP."
)
if
not
current_platform
.
is_kv_cache_dtype_supported
(
kv_cache_dtype
,
None
):
pytest
.
skip
(
f
"
{
kv_cache_dtype
}
is not supported on this platform."
)
with
monkeypatch
.
context
()
as
m
:
m
.
setenv
(
"TOKENIZERS_PARALLELISM"
,
'true'
)
m
.
setenv
(
STR_BACKEND_ENV_VAR
,
backend
)
...
...
tests/models/test_oot_registration.py
View file @
72dd1595
...
...
@@ -63,6 +63,7 @@ def test_oot_registration_embedding(
image
=
convert_image_mode
(
ImageAsset
(
"cherry_blossom"
).
pil_image
,
"RGB"
)
@
pytest
.
mark
.
skip
(
reason
=
"This test is skipped because it failed on V1."
)
@
create_new_process_for_each_test
()
def
test_oot_registration_multimodal
(
monkeypatch
:
pytest
.
MonkeyPatch
,
...
...
tests/quantization/test_compressed_tensors.py
View file @
72dd1595
...
...
@@ -357,6 +357,9 @@ def test_compressed_tensors_fp8(vllm_runner):
assert
output
@
pytest
.
mark
.
skipif
(
not
current_platform
.
is_kv_cache_dtype_supported
(
"fp8"
,
None
),
reason
=
"FP8 KV cache is not supported on this device."
)
@
pytest
.
mark
.
skipif
(
not
current_platform
.
is_cuda
(),
reason
=
"This test is skipped on non-CUDA platform."
)
def
test_compressed_tensors_kv_cache
(
vllm_runner
):
...
...
@@ -738,4 +741,4 @@ def test_compressed_tensors_transforms_perplexity(vllm_runner, model, prompt,
with
vllm_runner
(
model
,
enforce_eager
=
True
)
as
llm
:
perplexity
=
llm
.
generate_prompt_perplexity
([
prompt
])[
0
]
print
(
perplexity
)
assert
perplexity
<=
exp_perplexity
\ No newline at end of file
assert
perplexity
<=
exp_perplexity
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment