Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d1e21a97
Unverified
Commit
d1e21a97
authored
Dec 12, 2024
by
Cyrus Leung
Committed by
GitHub
Dec 12, 2024
Browse files
[CI/Build] Split up VLM tests (#11083)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
72ff3a96
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
94 additions
and
50 deletions
+94
-50
.buildkite/test-pipeline.yaml
.buildkite/test-pipeline.yaml
+21
-11
pyproject.toml
pyproject.toml
+2
-1
tests/models/decoder_only/vision_language/test_models.py
tests/models/decoder_only/vision_language/test_models.py
+46
-26
tests/utils.py
tests/utils.py
+25
-12
No files found.
.buildkite/test-pipeline.yaml
View file @
d1e21a97
...
...
@@ -321,7 +321,7 @@ steps:
##### models test #####
-
label
:
Basic Models Test
#
30
min
-
label
:
Basic Models Test
#
24
min
source_file_dependencies
:
-
vllm/
-
tests/models
...
...
@@ -331,7 +331,7 @@ steps:
-
pytest -v -s models/test_registry.py
-
pytest -v -s models/test_initialization.py
-
label
:
Language Models Test (Standard)
#
4
2min
-
label
:
Language Models Test (Standard)
#
3
2min
#mirror_hardwares: [amd]
source_file_dependencies
:
-
vllm/
...
...
@@ -342,7 +342,7 @@ steps:
-
pytest -v -s models/decoder_only/language -m 'core_model or quant_model'
-
pytest -v -s models/embedding/language -m core_model
-
label
:
Language Models Test (Extended)
#
5
0min
-
label
:
Language Models Test (Extended)
#
1h1
0min
optional
:
true
source_file_dependencies
:
-
vllm/
...
...
@@ -353,7 +353,7 @@ steps:
-
pytest -v -s models/decoder_only/language -m 'not core_model and not quant_model'
-
pytest -v -s models/embedding/language -m 'not core_model'
-
label
:
Multi-Modal Models Test (Standard)
# 2
6
min
-
label
:
Multi-Modal Models Test (Standard)
# 2
8
min
#mirror_hardwares: [amd]
source_file_dependencies
:
-
vllm/
...
...
@@ -369,7 +369,7 @@ steps:
-
pytest -v -s models/encoder_decoder/language -m core_model
-
pytest -v -s models/encoder_decoder/vision_language -m core_model
-
label
:
Multi-Modal Models Test (Extended)
# 1h1
5
m
-
label
:
Multi-Modal Models Test (Extended)
1
# 1h1
6
m
optional
:
true
source_file_dependencies
:
-
vllm/
...
...
@@ -380,14 +380,24 @@ steps:
commands
:
-
pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-
pytest -v -s models/decoder_only/audio_language -m 'not core_model and not quant_model'
-
pytest -v -s models/decoder_only/vision_language/test_models.py -m 'split(group=0) and not core_model and not quant_model'
# HACK - run phi3v tests separately to sidestep this transformers bug
# https://github.com/huggingface/transformers/issues/34307
-
pytest -v -s models/decoder_only/vision_language/test_phi3v.py
-
pytest -v -s --ignore models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m 'not core_model and not quant_model'
-
pytest -v -s --ignore
models/decoder_only/vision_language/test_models.py --ignore
models/decoder_only/vision_language/test_phi3v.py models/decoder_only/vision_language -m 'not core_model and not quant_model'
-
pytest -v -s models/embedding/vision_language -m 'not core_model'
-
pytest -v -s models/encoder_decoder/language -m 'not core_model'
-
pytest -v -s models/encoder_decoder/vision_language -m 'not core_model'
-
label
:
Multi-Modal Models Test (Extended)
2
# 38m
optional
:
true
source_file_dependencies
:
-
vllm/
-
tests/models/decoder_only/vision_language
commands
:
-
pip install git+https://github.com/TIGER-AI-Lab/Mantis.git
-
pytest -v -s models/decoder_only/vision_language/test_models.py -m 'split(group=1) and not core_model and not quant_model'
# This test is used only in PR development phase to test individual models and should never run on main
-
label
:
Custom Models Test
optional
:
true
...
...
@@ -446,11 +456,11 @@ steps:
-
pytest -v -s ./compile/test_basic_correctness.py
-
pytest -v -s ./compile/test_wrapper.py
-
VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep -q 'Same node test passed'
-
TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m distributed
_2
_gpus
-
TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m
'
distributed
(num
_gpus
=2)'
# Avoid importing model tests that cause CUDA reinitialization error
-
pytest models/encoder_decoder/language/test_bart.py -v -s -m distributed
_2
_gpus
-
pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m distributed
_2
_gpus
-
pytest models/decoder_only/vision_language/test_models.py -v -s -m distributed
_2
_gpus
-
pytest models/encoder_decoder/language/test_bart.py -v -s -m
'
distributed
(num
_gpus
=2)'
-
pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m
'
distributed
(num
_gpus
=2)'
-
pytest models/decoder_only/vision_language/test_models.py -v -s -m
'
distributed
(num
_gpus
=2)'
-
pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
-
pip install -e ./plugins/vllm_add_dummy_model
-
pytest -v -s distributed/test_distributed_oot.py
...
...
@@ -540,7 +550,7 @@ steps:
# see https://github.com/vllm-project/vllm/pull/5689 for details
-
pytest -v -s distributed/test_custom_all_reduce.py
-
torchrun --nproc_per_node=2 distributed/test_ca_buffer_sharing.py
-
TARGET_TEST_SUITE=A100 pytest basic_correctness/ -v -s -m distributed
_2
_gpus
-
TARGET_TEST_SUITE=A100 pytest basic_correctness/ -v -s -m
'
distributed
(num
_gpus
=2)'
-
pytest -v -s -x lora/test_mixtral.py
-
label
:
LM Eval Large Models
# optional
...
...
pyproject.toml
View file @
d1e21a97
...
...
@@ -96,7 +96,8 @@ markers = [
"core_model: enable this model test in each PR instead of only nightly"
,
"cpu_model: enable this model test in CPU tests"
,
"quant_model: run this model test under Quantized category"
,
"distributed_2_gpus: run this test only in distributed tests for 2 GPUs"
,
"split: run this test as part of a split"
,
"distributed: run this test only in distributed GPU tests"
,
"skip_v1: do not run this test with v1"
,
"optional: optional tests that are automatically skipped, include --optional to run them"
,
]
tests/models/decoder_only/vision_language/test_models.py
View file @
d1e21a97
"""Common tests for testing .generate() functionality for single / multiple
image, embedding, and video support for different VLMs in vLLM.
"""
import
math
import
os
from
collections
import
defaultdict
from
pathlib
import
PosixPath
from
typing
import
Type
...
...
@@ -10,11 +12,12 @@ from transformers import AutoModelForVision2Seq
from
transformers.utils
import
is_flash_attn_2_available
from
vllm.platforms
import
current_platform
from
vllm.utils
import
cuda_device_count_stateless
,
identity
from
vllm.utils
import
identity
from
....conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
,
_VideoAssets
)
from
....utils
import
fork_new_process_for_each_test
,
large_gpu_mark
from
....utils
import
(
fork_new_process_for_each_test
,
large_gpu_mark
,
multi_gpu_marks
)
from
...utils
import
check_outputs_equal
from
.vlm_utils
import
custom_inputs
,
model_utils
,
runners
from
.vlm_utils.case_filtering
import
get_parametrized_options
...
...
@@ -382,7 +385,7 @@ VLM_TEST_SETTINGS = {
prompt_path_encoder
=
model_utils
.
qwen_prompt_path_encoder
,
),
### Tensor parallel / multi-gpu broadcast tests
"
broadcast-chameleon
"
:
VLMTestInfo
(
"
chameleon-broadcast
"
:
VLMTestInfo
(
models
=
[
"facebook/chameleon-7b"
],
prompt_formatter
=
lambda
img_prompt
:
f
"USER:
{
img_prompt
}
\n
ASSISTANT:"
,
max_model_len
=
4096
,
...
...
@@ -393,43 +396,25 @@ VLM_TEST_SETTINGS = {
vllm_output_post_proc
=
lambda
vllm_output
,
model
:
vllm_output
[:
2
],
hf_output_post_proc
=
lambda
hf_output
,
model
:
hf_output
[:
2
],
comparator
=
check_outputs_equal
,
marks
=
[
pytest
.
mark
.
distributed_2_gpus
,
pytest
.
mark
.
skipif
(
cuda_device_count_stateless
()
<
2
,
reason
=
"Need at least 2 GPUs to run the test."
,
),
],
marks
=
multi_gpu_marks
(
num_gpus
=
2
),
**
COMMON_BROADCAST_SETTINGS
# type: ignore
),
"broadcast
-llava
"
:
VLMTestInfo
(
"
llava-
broadcast"
:
VLMTestInfo
(
models
=
[
"llava-hf/llava-1.5-7b-hf"
],
prompt_formatter
=
lambda
img_prompt
:
f
"USER:
{
img_prompt
}
\n
ASSISTANT:"
,
max_model_len
=
4096
,
auto_cls
=
AutoModelForVision2Seq
,
vllm_output_post_proc
=
model_utils
.
llava_image_vllm_to_hf_output
,
marks
=
[
pytest
.
mark
.
distributed_2_gpus
,
pytest
.
mark
.
skipif
(
cuda_device_count_stateless
()
<
2
,
reason
=
"Need at least 2 GPUs to run the test."
,
)
],
marks
=
multi_gpu_marks
(
num_gpus
=
2
),
**
COMMON_BROADCAST_SETTINGS
# type: ignore
),
"
broadcast-llava_nex
t"
:
VLMTestInfo
(
"
llava_next-broadcas
t"
:
VLMTestInfo
(
models
=
[
"llava-hf/llava-v1.6-mistral-7b-hf"
],
prompt_formatter
=
lambda
img_prompt
:
f
"[INST]
{
img_prompt
}
[/INST]"
,
max_model_len
=
10240
,
auto_cls
=
AutoModelForVision2Seq
,
vllm_output_post_proc
=
model_utils
.
llava_image_vllm_to_hf_output
,
marks
=
[
pytest
.
mark
.
distributed_2_gpus
,
pytest
.
mark
.
skipif
(
cuda_device_count_stateless
()
<
2
,
reason
=
"Need at least 2 GPUs to run the test."
,
)
],
marks
=
multi_gpu_marks
(
num_gpus
=
2
),
**
COMMON_BROADCAST_SETTINGS
# type: ignore
),
### Custom input edge-cases for specific models
...
...
@@ -468,6 +453,41 @@ VLM_TEST_SETTINGS = {
# yapf: enable
def
_mark_splits
(
test_settings
:
dict
[
str
,
VLMTestInfo
],
*
,
num_groups
:
int
,
)
->
dict
[
str
,
VLMTestInfo
]:
name_by_test_info_id
=
{
id
(
v
):
k
for
k
,
v
in
test_settings
.
items
()}
test_infos_by_model
=
defaultdict
[
str
,
list
[
VLMTestInfo
]](
list
)
for
info
in
test_settings
.
values
():
for
model
in
info
.
models
:
test_infos_by_model
[
model
].
append
(
info
)
models
=
sorted
(
test_infos_by_model
.
keys
())
split_size
=
math
.
ceil
(
len
(
models
)
/
num_groups
)
new_test_settings
=
dict
[
str
,
VLMTestInfo
]()
for
i
in
range
(
num_groups
):
models_in_group
=
models
[
i
*
split_size
:(
i
+
1
)
*
split_size
]
for
model
in
models_in_group
:
for
info
in
test_infos_by_model
[
model
]:
new_marks
=
(
info
.
marks
or
[])
+
[
pytest
.
mark
.
split
(
group
=
i
)]
new_info
=
info
.
_replace
(
marks
=
new_marks
)
new_test_settings
[
name_by_test_info_id
[
id
(
info
)]]
=
new_info
missing_keys
=
test_settings
.
keys
()
-
new_test_settings
.
keys
()
assert
not
missing_keys
,
f
"Missing keys:
{
missing_keys
}
"
return
new_test_settings
VLM_TEST_SETTINGS
=
_mark_splits
(
VLM_TEST_SETTINGS
,
num_groups
=
2
)
### Test wrappers
# Wrappers around the core test running func for:
# - single image
...
...
tests/utils.py
View file @
d1e21a97
...
...
@@ -682,10 +682,12 @@ def fork_new_process_for_each_test(
def
large_gpu_mark
(
min_gb
:
int
)
->
pytest
.
MarkDecorator
:
"""Gets a pytest skipif mark, which triggers ig the the device doesn't have
meet a minimum memory requirement in gb; can be leveraged via
@large_gpu_test to skip tests in environments without enough resources, or
called when filtering tests to run directly.
"""
Get a pytest mark, which skips the test if the GPU doesn't meet
a minimum memory requirement in GB.
This can be leveraged via `@large_gpu_test` to skip tests in environments
without enough resources, or called when filtering tests to run directly.
"""
try
:
if
current_platform
.
is_cpu
():
...
...
@@ -712,26 +714,37 @@ def large_gpu_test(*, min_gb: int):
Currently, the CI machine uses L4 GPU which has 24 GB VRAM.
"""
test_skipif
=
large_gpu_mark
(
min_gb
)
mark
=
large_gpu_mark
(
min_gb
)
def
wrapper
(
f
:
Callable
[
_P
,
None
])
->
Callable
[
_P
,
None
]:
return
test_skipif
(
f
)
return
mark
(
f
)
return
wrapper
def
multi_gpu_test
(
*
,
num_gpus
:
int
):
"""
Decorate a test to be run only when multiple GPUs are available.
"""
test_selector
=
getattr
(
pytest
.
mark
,
f
"distributed_
{
num_gpus
}
_gpus"
)
def
multi_gpu_marks
(
*
,
num_gpus
:
int
):
"""Get a collection of pytest marks to apply for `@multi_gpu_test`."""
test_selector
=
pytest
.
mark
.
distributed
(
num_gpus
=
num_gpus
)
test_skipif
=
pytest
.
mark
.
skipif
(
cuda_device_count_stateless
()
<
num_gpus
,
reason
=
f
"Need at least
{
num_gpus
}
GPUs to run the test."
,
)
return
[
test_selector
,
test_skipif
]
def
multi_gpu_test
(
*
,
num_gpus
:
int
):
"""
Decorate a test to be run only when multiple GPUs are available.
"""
marks
=
multi_gpu_marks
(
num_gpus
=
num_gpus
)
def
wrapper
(
f
:
Callable
[
_P
,
None
])
->
Callable
[
_P
,
None
]:
return
test_selector
(
test_skipif
(
fork_new_process_for_each_test
(
f
)))
func
=
fork_new_process_for_each_test
(
f
)
for
mark
in
reversed
(
marks
):
func
=
mark
(
func
)
return
func
return
wrapper
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment