Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
66b809cc
Commit
66b809cc
authored
Feb 08, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.7.2' into v0.7.2-dev
parents
37b63c24
0408efc6
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
91 additions
and
29 deletions
+91
-29
tests/models/decoder_only/language/test_gptq_marlin_24.py
tests/models/decoder_only/language/test_gptq_marlin_24.py
+1
-0
tests/models/decoder_only/language/test_granite.py
tests/models/decoder_only/language/test_granite.py
+1
-0
tests/models/decoder_only/language/test_jamba.py
tests/models/decoder_only/language/test_jamba.py
+2
-0
tests/models/decoder_only/language/test_mamba.py
tests/models/decoder_only/language/test_mamba.py
+1
-0
tests/models/decoder_only/language/test_mistral.py
tests/models/decoder_only/language/test_mistral.py
+1
-0
tests/models/decoder_only/language/test_modelopt.py
tests/models/decoder_only/language/test_modelopt.py
+2
-0
tests/models/decoder_only/language/test_models.py
tests/models/decoder_only/language/test_models.py
+1
-0
tests/models/decoder_only/language/test_phimoe.py
tests/models/decoder_only/language/test_phimoe.py
+1
-0
tests/models/decoder_only/vision_language/test_awq.py
tests/models/decoder_only/vision_language/test_awq.py
+2
-0
tests/models/decoder_only/vision_language/test_intern_vit.py
tests/models/decoder_only/vision_language/test_intern_vit.py
+2
-0
tests/models/decoder_only/vision_language/test_models.py
tests/models/decoder_only/vision_language/test_models.py
+36
-12
tests/models/decoder_only/vision_language/test_phi3v.py
tests/models/decoder_only/vision_language/test_phi3v.py
+2
-0
tests/models/decoder_only/vision_language/test_pixtral.py
tests/models/decoder_only/vision_language/test_pixtral.py
+1
-0
tests/models/decoder_only/vision_language/test_qwen2_vl.py
tests/models/decoder_only/vision_language/test_qwen2_vl.py
+2
-0
tests/models/decoder_only/vision_language/vlm_utils/builders.py
...models/decoder_only/vision_language/vlm_utils/builders.py
+1
-0
tests/models/decoder_only/vision_language/vlm_utils/case_filtering.py
.../decoder_only/vision_language/vlm_utils/case_filtering.py
+1
-0
tests/models/decoder_only/vision_language/vlm_utils/core.py
tests/models/decoder_only/vision_language/vlm_utils/core.py
+2
-1
tests/models/decoder_only/vision_language/vlm_utils/custom_inputs.py
...s/decoder_only/vision_language/vlm_utils/custom_inputs.py
+1
-0
tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
...els/decoder_only/vision_language/vlm_utils/model_utils.py
+30
-16
tests/models/decoder_only/vision_language/vlm_utils/runners.py
.../models/decoder_only/vision_language/vlm_utils/runners.py
+1
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
tests/models/decoder_only/language/test_gptq_marlin_24.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of a GPTQ model to a Marlin_24 model.
Note: GPTQ and Marlin_24 do not have bitwise correctness.
...
...
tests/models/decoder_only/language/test_granite.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for Granite models using greedy sampling.
Run `pytest tests/models/test_granite.py`.
...
...
tests/models/decoder_only/language/test_jamba.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pytest
import
os
...
...
tests/models/decoder_only/language/test_mamba.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM when using greedy sampling for Mamba.
Run `pytest tests/models/test_mamba.py`.
...
...
tests/models/decoder_only/language/test_mistral.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
Run `pytest tests/models/test_mistral.py`.
...
...
tests/models/decoder_only/language/test_modelopt.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# flake8: noqa
"""Tests Model Optimizer fp8 models against ground truth generation
Note: these tests will only pass on H100
...
...
tests/models/decoder_only/language/test_models.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM when using greedy sampling.
Run `pytest tests/models/test_models.py`.
...
...
tests/models/decoder_only/language/test_phimoe.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for moe models using greedy sampling.
Run `pytest tests/models/test_phimoe.py`.
...
...
tests/models/decoder_only/vision_language/test_awq.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Optional
,
Type
import
os
...
...
tests/models/decoder_only/vision_language/test_intern_vit.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Optional
import
os
...
...
tests/models/decoder_only/vision_language/test_models.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Common tests for testing .generate() functionality for single / multiple
image, embedding, and video support for different VLMs in vLLM.
"""
...
...
@@ -9,6 +10,7 @@ from typing import Type
import
os
import
pytest
from
packaging.version
import
Version
from
transformers
import
AutoModelForVision2Seq
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
...
...
@@ -121,6 +123,8 @@ VLM_TEST_SETTINGS = {
else
(
"half"
,
"float"
)),
marks
=
[
pytest
.
mark
.
core_model
],
),
# TODO(ywang96): Move Qwen2-VL out of core models in favor of Qwen2.5-VL
# once we upgraded to transformers>=4.49.0.
"qwen2_vl"
:
VLMTestInfo
(
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-VL-2B-Instruct"
)],
test_type
=
(
...
...
@@ -138,6 +142,26 @@ VLM_TEST_SETTINGS = {
image_size_factors
=
[(),
(
0.25
,),
(
0.25
,
0.25
,
0.25
),
(
0.25
,
0.2
,
0.15
)],
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
],
),
"qwen2_5_vl"
:
VLMTestInfo
(
models
=
[
"Qwen/Qwen2.5-VL-3B-Instruct"
],
test_type
=
(
VLMTestType
.
IMAGE
,
VLMTestType
.
MULTI_IMAGE
,
VLMTestType
.
VIDEO
),
prompt_formatter
=
lambda
img_prompt
:
f
"<|im_start|>User
\n
{
img_prompt
}
<|im_end|>
\n
<|im_start|>assistant
\n
"
,
# noqa: E501
img_idx_to_prompt
=
lambda
idx
:
"<|vision_start|><|image_pad|><|vision_end|>"
,
# noqa: E501
video_idx_to_prompt
=
lambda
idx
:
"<|vision_start|><|video_pad|><|vision_end|>"
,
# noqa: E501
max_model_len
=
4096
,
max_num_seqs
=
2
,
auto_cls
=
AutoModelForVision2Seq
,
vllm_output_post_proc
=
model_utils
.
qwen2_vllm_to_hf_output
,
image_size_factors
=
[(),
(
0.25
,),
(
0.25
,
0.25
,
0.25
),
(
0.25
,
0.2
,
0.15
)],
marks
=
[
pytest
.
mark
.
skipif
(
TRANSFORMERS_VERSION
<
"4.49.0"
,
reason
=
"HF model requires transformers>=4.49.0"
,
),
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
],
),
#### Extended model tests
"aria"
:
VLMTestInfo
(
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"rhymes-ai/Aria"
)],
...
...
@@ -155,13 +179,7 @@ VLM_TEST_SETTINGS = {
stop_str
=
[
"<|im_end|>"
],
image_size_factors
=
[(
0.10
,
0.15
)],
max_tokens
=
64
,
marks
=
[
pytest
.
mark
.
skipif
(
TRANSFORMERS_VERSION
<
"4.48.0"
,
reason
=
"HF model requires transformers>=4.48.0"
,
),
large_gpu_mark
(
min_gb
=
64
),
],
marks
=
[
large_gpu_mark
(
min_gb
=
64
)],
),
"blip2"
:
VLMTestInfo
(
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"Salesforce/blip2-opt-2.7b"
)],
...
...
@@ -207,8 +225,8 @@ VLM_TEST_SETTINGS = {
image_size_factors
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
),
(
0.1
,
0.5
,
1.0
)],
marks
=
[
pytest
.
mark
.
skipif
(
TRANSFORMERS_VERSION
>=
"4.48
.0
"
,
reason
=
"HF model is not compatible with transformers>=4.48
.0
"
,
Version
(
TRANSFORMERS_VERSION
)
>=
Version
(
"4.48"
)
,
reason
=
"HF model is not compatible with transformers>=4.48"
,
)
],
),
...
...
@@ -251,17 +269,18 @@ VLM_TEST_SETTINGS = {
max_model_len
=
8192
,
dtype
=
"bfloat16"
,
use_tokenizer_eos
=
True
,
num_logprobs
=
10
,
patch_hf_runner
=
model_utils
.
h2ovl_patch_hf_runner
,
),
"idefics3"
:
VLMTestInfo
(
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"HuggingFace
M4/Idefics3-8B-Llama3
"
)],
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"HuggingFace
TB/SmolVLM-256M-Instruct
"
)],
test_type
=
(
VLMTestType
.
IMAGE
,
VLMTestType
.
MULTI_IMAGE
),
prompt_formatter
=
lambda
img_prompt
:
f
"<|begin_of_text|>User:
{
img_prompt
}
<end_of_utterance>
\n
Assistant:"
,
# noqa: E501
img_idx_to_prompt
=
lambda
idx
:
"<image>"
,
max_model_len
=
8192
,
max_num_seqs
=
2
,
auto_cls
=
AutoModelForVision2Seq
,
marks
=
[
large_gpu_mark
(
min_gb
=
48
)]
,
hf_output_post_proc
=
model_utils
.
idefics3_trunc_hf_output
,
),
"intern_vl"
:
VLMTestInfo
(
models
=
[
...
...
@@ -283,7 +302,6 @@ VLM_TEST_SETTINGS = {
dtype
=
"bfloat16"
,
use_tokenizer_eos
=
True
,
patch_hf_runner
=
model_utils
.
internvl_patch_hf_runner
,
marks
=
[
large_gpu_mark
(
min_gb
=
32
)],
),
"llava_next"
:
VLMTestInfo
(
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"llava-hf/llava-v1.6-mistral-7b-hf"
)],
...
...
@@ -340,6 +358,12 @@ VLM_TEST_SETTINGS = {
auto_cls
=
AutoModelForVision2Seq
,
vllm_output_post_proc
=
model_utils
.
mantis_vllm_to_hf_output
,
patch_hf_runner
=
model_utils
.
mantis_patch_hf_runner
,
marks
=
[
pytest
.
mark
.
skipif
(
Version
(
TRANSFORMERS_VERSION
)
>=
Version
(
"4.48"
),
reason
=
"HF model is not compatible with transformers>=4.48"
,
)
],
),
"minicpmv_25"
:
VLMTestInfo
(
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"openbmb/MiniCPM-Llama3-V-2_5"
)],
...
...
tests/models/decoder_only/vision_language/test_phi3v.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
re
from
typing
import
List
,
Optional
,
Tuple
,
Type
...
...
tests/models/decoder_only/vision_language/test_pixtral.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
Run `pytest tests/models/test_mistral.py`.
...
...
tests/models/decoder_only/vision_language/test_qwen2_vl.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Any
,
List
,
Optional
,
Tuple
,
Type
,
TypedDict
,
Union
import
os
...
...
tests/models/decoder_only/vision_language/vlm_utils/builders.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Helpers for building inputs that can be leveraged for different test types.
"""
from
pathlib
import
PosixPath
...
...
tests/models/decoder_only/vision_language/vlm_utils/case_filtering.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Utils for determining which subset of model tests belong to a specific
modality, getting all combinations (similar to pytest's parametrization),
handling multimodal placeholder substitution, and so on.
...
...
tests/models/decoder_only/vision_language/vlm_utils/core.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Core test implementation to be shared across modalities."""
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Tuple
,
Type
,
Union
...
...
@@ -153,4 +154,4 @@ def process_runner_outputs(
def
process_outputs
(
output_processor
,
model
,
outputs_per_image
):
"""Applies a model specific post-processor function to a runner's output"""
return
[[
output_processor
(
res
,
model
)
for
res
in
outputs
]
for
outputs
in
outputs_per_image
]
\ No newline at end of file
for
outputs
in
outputs_per_image
]
tests/models/decoder_only/vision_language/vlm_utils/custom_inputs.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Custom input builders for edge-cases in different models."""
from
typing
import
Callable
...
...
tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Common utility functions relating to different models that are useful
for manipulating the input / output of HF & vLLM test runners, which are
typically specific to a small subset of models.
...
...
@@ -191,6 +192,14 @@ def deepseekvl2_trunc_hf_output(hf_output: RunnerOutput,
return
output_ids
,
output_str
,
out_logprobs
def
idefics3_trunc_hf_output
(
hf_output
:
RunnerOutput
,
model
:
str
)
->
RunnerOutput
:
output_ids
,
output_str
,
out_logprobs
=
hf_output
if
output_str
.
endswith
(
"<end_of_utterance>"
):
output_str
=
output_str
.
split
(
"<end_of_utterance>"
)[
0
]
return
output_ids
,
output_str
,
out_logprobs
def
minicpmv_trunc_hf_output
(
hf_output
:
RunnerOutput
,
model
:
str
)
->
RunnerOutput
:
output_ids
,
output_str
,
out_logprobs
=
hf_output
...
...
@@ -333,12 +342,12 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
def
__init__
(
self
,
hf_runner
:
HfRunner
):
self
.
num_image_token
=
hf_runner
.
model
.
num_image_token
self
.
tokenizer
=
hf_runner
.
tokenizer
self
.
dtype
=
hf_runner
.
model
.
dtype
self
.
config
=
AutoConfig
.
from_pretrained
(
hf_runner
.
model_name
,
trust_remote_code
=
True
)
self
.
vision_config
=
self
.
config
.
vision_config
self
.
use_thumbnail
=
self
.
config
.
use_thumbnail
self
.
use_msac
=
self
.
config
.
use_msac
self
.
min_num
=
self
.
config
.
min_dynamic_patch
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
image_size
=
self
.
vision_config
.
image_size
...
...
@@ -347,18 +356,19 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
**
kwargs
):
# yapf: disable
from
vllm.model_executor.models.h2ovl
import
(
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
image_to_pixel_values
)
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
image_to_pixel_values
_h2ovl
)
# yapf: enable
images
=
[
images
]
if
isinstance
(
images
,
Image
)
else
images
pixel_values
=
[
image_to_pixel_values
(
image
,
self
.
image_size
,
self
.
min_num
,
self
.
max_num
,
self
.
use_thumbnail
,
use_MSAC
=
self
.
config
.
use_msac
).
to
(
self
.
dtype
)
for
image
in
images
image_to_pixel_values_h2ovl
(
image
,
input_size
=
self
.
image_size
,
min_num
=
self
.
min_num
,
max_num
=
self
.
max_num
,
use_thumbnail
=
self
.
use_thumbnail
,
use_msac
=
self
.
use_msac
,
)
for
image
in
images
]
num_patches_list
=
[
pixel_value
.
shape
[
0
]
for
pixel_value
in
pixel_values
...
...
@@ -393,7 +403,6 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
def
__init__
(
self
,
hf_runner
:
HfRunner
):
self
.
num_image_token
=
hf_runner
.
model
.
num_image_token
self
.
tokenizer
=
hf_runner
.
tokenizer
self
.
dtype
=
hf_runner
.
model
.
dtype
self
.
config
=
AutoConfig
.
from_pretrained
(
hf_runner
.
model_name
,
trust_remote_code
=
True
)
...
...
@@ -406,13 +415,17 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
List
[
Image
]],
**
kwargs
):
from
vllm.model_executor.models.internvl
import
(
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
image_to_pixel_values
)
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
image_to_pixel_values_internvl
)
images
=
[
images
]
if
isinstance
(
images
,
Image
)
else
images
pixel_values
=
[
image_to_pixel_values
(
image
,
self
.
image_size
,
self
.
min_num
,
self
.
max_num
,
self
.
use_thumbnail
).
to
(
self
.
dtype
)
for
image
in
images
image_to_pixel_values_internvl
(
image
,
input_size
=
self
.
image_size
,
min_num
=
self
.
min_num
,
max_num
=
self
.
max_num
,
use_thumbnail
=
self
.
use_thumbnail
,
)
for
image
in
images
]
num_patches_list
=
[
pixel_value
.
shape
[
0
]
for
pixel_value
in
pixel_values
...
...
@@ -447,7 +460,8 @@ def _internvl_generate(
)
->
torch
.
LongTensor
:
"""Generate method for InternVL2 model without fixed use_cache."""
assert
self
.
img_context_token_id
is
not
None
vit_embeds
=
self
.
extract_feature
(
pixel_values
)
target_dtype
=
next
(
self
.
parameters
()).
dtype
vit_embeds
=
self
.
extract_feature
(
pixel_values
.
to
(
target_dtype
))
input_embeds
=
self
.
language_model
.
get_input_embeddings
()(
input_ids
)
B
,
N
,
C
=
input_embeds
.
shape
input_embeds
=
input_embeds
.
reshape
(
B
*
N
,
C
)
...
...
tests/models/decoder_only/vision_language/vlm_utils/runners.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Entrypoints for wrapping the core run_test implementation for specific test
types / modalities.
"""
...
...
Prev
1
…
13
14
15
16
17
18
19
20
21
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment