Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
66b809cc
Commit
66b809cc
authored
Feb 08, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.7.2' into v0.7.2-dev
parents
37b63c24
0408efc6
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
91 additions
and
29 deletions
+91
-29
tests/models/decoder_only/language/test_gptq_marlin_24.py
tests/models/decoder_only/language/test_gptq_marlin_24.py
+1
-0
tests/models/decoder_only/language/test_granite.py
tests/models/decoder_only/language/test_granite.py
+1
-0
tests/models/decoder_only/language/test_jamba.py
tests/models/decoder_only/language/test_jamba.py
+2
-0
tests/models/decoder_only/language/test_mamba.py
tests/models/decoder_only/language/test_mamba.py
+1
-0
tests/models/decoder_only/language/test_mistral.py
tests/models/decoder_only/language/test_mistral.py
+1
-0
tests/models/decoder_only/language/test_modelopt.py
tests/models/decoder_only/language/test_modelopt.py
+2
-0
tests/models/decoder_only/language/test_models.py
tests/models/decoder_only/language/test_models.py
+1
-0
tests/models/decoder_only/language/test_phimoe.py
tests/models/decoder_only/language/test_phimoe.py
+1
-0
tests/models/decoder_only/vision_language/test_awq.py
tests/models/decoder_only/vision_language/test_awq.py
+2
-0
tests/models/decoder_only/vision_language/test_intern_vit.py
tests/models/decoder_only/vision_language/test_intern_vit.py
+2
-0
tests/models/decoder_only/vision_language/test_models.py
tests/models/decoder_only/vision_language/test_models.py
+36
-12
tests/models/decoder_only/vision_language/test_phi3v.py
tests/models/decoder_only/vision_language/test_phi3v.py
+2
-0
tests/models/decoder_only/vision_language/test_pixtral.py
tests/models/decoder_only/vision_language/test_pixtral.py
+1
-0
tests/models/decoder_only/vision_language/test_qwen2_vl.py
tests/models/decoder_only/vision_language/test_qwen2_vl.py
+2
-0
tests/models/decoder_only/vision_language/vlm_utils/builders.py
...models/decoder_only/vision_language/vlm_utils/builders.py
+1
-0
tests/models/decoder_only/vision_language/vlm_utils/case_filtering.py
.../decoder_only/vision_language/vlm_utils/case_filtering.py
+1
-0
tests/models/decoder_only/vision_language/vlm_utils/core.py
tests/models/decoder_only/vision_language/vlm_utils/core.py
+2
-1
tests/models/decoder_only/vision_language/vlm_utils/custom_inputs.py
...s/decoder_only/vision_language/vlm_utils/custom_inputs.py
+1
-0
tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
...els/decoder_only/vision_language/vlm_utils/model_utils.py
+30
-16
tests/models/decoder_only/vision_language/vlm_utils/runners.py
.../models/decoder_only/vision_language/vlm_utils/runners.py
+1
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
tests/models/decoder_only/language/test_gptq_marlin_24.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of a GPTQ model to a Marlin_24 model.
"""Compare the outputs of a GPTQ model to a Marlin_24 model.
Note: GPTQ and Marlin_24 do not have bitwise correctness.
Note: GPTQ and Marlin_24 do not have bitwise correctness.
...
...
tests/models/decoder_only/language/test_granite.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for Granite models using greedy sampling.
"""Compare the outputs of HF and vLLM for Granite models using greedy sampling.
Run `pytest tests/models/test_granite.py`.
Run `pytest tests/models/test_granite.py`.
...
...
tests/models/decoder_only/language/test_jamba.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
pytest
import
pytest
import
os
import
os
...
...
tests/models/decoder_only/language/test_mamba.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM when using greedy sampling for Mamba.
"""Compare the outputs of HF and vLLM when using greedy sampling for Mamba.
Run `pytest tests/models/test_mamba.py`.
Run `pytest tests/models/test_mamba.py`.
...
...
tests/models/decoder_only/language/test_mistral.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
Run `pytest tests/models/test_mistral.py`.
Run `pytest tests/models/test_mistral.py`.
...
...
tests/models/decoder_only/language/test_modelopt.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
# flake8: noqa
# flake8: noqa
"""Tests Model Optimizer fp8 models against ground truth generation
"""Tests Model Optimizer fp8 models against ground truth generation
Note: these tests will only pass on H100
Note: these tests will only pass on H100
...
...
tests/models/decoder_only/language/test_models.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM when using greedy sampling.
"""Compare the outputs of HF and vLLM when using greedy sampling.
Run `pytest tests/models/test_models.py`.
Run `pytest tests/models/test_models.py`.
...
...
tests/models/decoder_only/language/test_phimoe.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for moe models using greedy sampling.
"""Compare the outputs of HF and vLLM for moe models using greedy sampling.
Run `pytest tests/models/test_phimoe.py`.
Run `pytest tests/models/test_phimoe.py`.
...
...
tests/models/decoder_only/vision_language/test_awq.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Optional
,
Type
from
typing
import
List
,
Optional
,
Type
import
os
import
os
...
...
tests/models/decoder_only/vision_language/test_intern_vit.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Optional
from
typing
import
Optional
import
os
import
os
...
...
tests/models/decoder_only/vision_language/test_models.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Common tests for testing .generate() functionality for single / multiple
"""Common tests for testing .generate() functionality for single / multiple
image, embedding, and video support for different VLMs in vLLM.
image, embedding, and video support for different VLMs in vLLM.
"""
"""
...
@@ -9,6 +10,7 @@ from typing import Type
...
@@ -9,6 +10,7 @@ from typing import Type
import
os
import
os
import
pytest
import
pytest
from
packaging.version
import
Version
from
transformers
import
AutoModelForVision2Seq
from
transformers
import
AutoModelForVision2Seq
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
...
@@ -121,6 +123,8 @@ VLM_TEST_SETTINGS = {
...
@@ -121,6 +123,8 @@ VLM_TEST_SETTINGS = {
else
(
"half"
,
"float"
)),
else
(
"half"
,
"float"
)),
marks
=
[
pytest
.
mark
.
core_model
],
marks
=
[
pytest
.
mark
.
core_model
],
),
),
# TODO(ywang96): Move Qwen2-VL out of core models in favor of Qwen2.5-VL
# once we upgraded to transformers>=4.49.0.
"qwen2_vl"
:
VLMTestInfo
(
"qwen2_vl"
:
VLMTestInfo
(
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-VL-2B-Instruct"
)],
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"Qwen/Qwen2-VL-2B-Instruct"
)],
test_type
=
(
test_type
=
(
...
@@ -138,6 +142,26 @@ VLM_TEST_SETTINGS = {
...
@@ -138,6 +142,26 @@ VLM_TEST_SETTINGS = {
image_size_factors
=
[(),
(
0.25
,),
(
0.25
,
0.25
,
0.25
),
(
0.25
,
0.2
,
0.15
)],
image_size_factors
=
[(),
(
0.25
,),
(
0.25
,
0.25
,
0.25
),
(
0.25
,
0.2
,
0.15
)],
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
],
marks
=
[
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
],
),
),
"qwen2_5_vl"
:
VLMTestInfo
(
models
=
[
"Qwen/Qwen2.5-VL-3B-Instruct"
],
test_type
=
(
VLMTestType
.
IMAGE
,
VLMTestType
.
MULTI_IMAGE
,
VLMTestType
.
VIDEO
),
prompt_formatter
=
lambda
img_prompt
:
f
"<|im_start|>User
\n
{
img_prompt
}
<|im_end|>
\n
<|im_start|>assistant
\n
"
,
# noqa: E501
img_idx_to_prompt
=
lambda
idx
:
"<|vision_start|><|image_pad|><|vision_end|>"
,
# noqa: E501
video_idx_to_prompt
=
lambda
idx
:
"<|vision_start|><|video_pad|><|vision_end|>"
,
# noqa: E501
max_model_len
=
4096
,
max_num_seqs
=
2
,
auto_cls
=
AutoModelForVision2Seq
,
vllm_output_post_proc
=
model_utils
.
qwen2_vllm_to_hf_output
,
image_size_factors
=
[(),
(
0.25
,),
(
0.25
,
0.25
,
0.25
),
(
0.25
,
0.2
,
0.15
)],
marks
=
[
pytest
.
mark
.
skipif
(
TRANSFORMERS_VERSION
<
"4.49.0"
,
reason
=
"HF model requires transformers>=4.49.0"
,
),
pytest
.
mark
.
core_model
,
pytest
.
mark
.
cpu_model
],
),
#### Extended model tests
#### Extended model tests
"aria"
:
VLMTestInfo
(
"aria"
:
VLMTestInfo
(
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"rhymes-ai/Aria"
)],
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"rhymes-ai/Aria"
)],
...
@@ -155,13 +179,7 @@ VLM_TEST_SETTINGS = {
...
@@ -155,13 +179,7 @@ VLM_TEST_SETTINGS = {
stop_str
=
[
"<|im_end|>"
],
stop_str
=
[
"<|im_end|>"
],
image_size_factors
=
[(
0.10
,
0.15
)],
image_size_factors
=
[(
0.10
,
0.15
)],
max_tokens
=
64
,
max_tokens
=
64
,
marks
=
[
marks
=
[
large_gpu_mark
(
min_gb
=
64
)],
pytest
.
mark
.
skipif
(
TRANSFORMERS_VERSION
<
"4.48.0"
,
reason
=
"HF model requires transformers>=4.48.0"
,
),
large_gpu_mark
(
min_gb
=
64
),
],
),
),
"blip2"
:
VLMTestInfo
(
"blip2"
:
VLMTestInfo
(
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"Salesforce/blip2-opt-2.7b"
)],
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"Salesforce/blip2-opt-2.7b"
)],
...
@@ -207,8 +225,8 @@ VLM_TEST_SETTINGS = {
...
@@ -207,8 +225,8 @@ VLM_TEST_SETTINGS = {
image_size_factors
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
),
(
0.1
,
0.5
,
1.0
)],
image_size_factors
=
[(),
(
1.0
,
),
(
1.0
,
1.0
,
1.0
),
(
0.1
,
0.5
,
1.0
)],
marks
=
[
marks
=
[
pytest
.
mark
.
skipif
(
pytest
.
mark
.
skipif
(
TRANSFORMERS_VERSION
>=
"4.48
.0
"
,
Version
(
TRANSFORMERS_VERSION
)
>=
Version
(
"4.48"
)
,
reason
=
"HF model is not compatible with transformers>=4.48
.0
"
,
reason
=
"HF model is not compatible with transformers>=4.48"
,
)
)
],
],
),
),
...
@@ -251,17 +269,18 @@ VLM_TEST_SETTINGS = {
...
@@ -251,17 +269,18 @@ VLM_TEST_SETTINGS = {
max_model_len
=
8192
,
max_model_len
=
8192
,
dtype
=
"bfloat16"
,
dtype
=
"bfloat16"
,
use_tokenizer_eos
=
True
,
use_tokenizer_eos
=
True
,
num_logprobs
=
10
,
patch_hf_runner
=
model_utils
.
h2ovl_patch_hf_runner
,
patch_hf_runner
=
model_utils
.
h2ovl_patch_hf_runner
,
),
),
"idefics3"
:
VLMTestInfo
(
"idefics3"
:
VLMTestInfo
(
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"HuggingFace
M4/Idefics3-8B-Llama3
"
)],
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"HuggingFace
TB/SmolVLM-256M-Instruct
"
)],
test_type
=
(
VLMTestType
.
IMAGE
,
VLMTestType
.
MULTI_IMAGE
),
test_type
=
(
VLMTestType
.
IMAGE
,
VLMTestType
.
MULTI_IMAGE
),
prompt_formatter
=
lambda
img_prompt
:
f
"<|begin_of_text|>User:
{
img_prompt
}
<end_of_utterance>
\n
Assistant:"
,
# noqa: E501
prompt_formatter
=
lambda
img_prompt
:
f
"<|begin_of_text|>User:
{
img_prompt
}
<end_of_utterance>
\n
Assistant:"
,
# noqa: E501
img_idx_to_prompt
=
lambda
idx
:
"<image>"
,
img_idx_to_prompt
=
lambda
idx
:
"<image>"
,
max_model_len
=
8192
,
max_model_len
=
8192
,
max_num_seqs
=
2
,
max_num_seqs
=
2
,
auto_cls
=
AutoModelForVision2Seq
,
auto_cls
=
AutoModelForVision2Seq
,
marks
=
[
large_gpu_mark
(
min_gb
=
48
)]
,
hf_output_post_proc
=
model_utils
.
idefics3_trunc_hf_output
,
),
),
"intern_vl"
:
VLMTestInfo
(
"intern_vl"
:
VLMTestInfo
(
models
=
[
models
=
[
...
@@ -283,7 +302,6 @@ VLM_TEST_SETTINGS = {
...
@@ -283,7 +302,6 @@ VLM_TEST_SETTINGS = {
dtype
=
"bfloat16"
,
dtype
=
"bfloat16"
,
use_tokenizer_eos
=
True
,
use_tokenizer_eos
=
True
,
patch_hf_runner
=
model_utils
.
internvl_patch_hf_runner
,
patch_hf_runner
=
model_utils
.
internvl_patch_hf_runner
,
marks
=
[
large_gpu_mark
(
min_gb
=
32
)],
),
),
"llava_next"
:
VLMTestInfo
(
"llava_next"
:
VLMTestInfo
(
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"llava-hf/llava-v1.6-mistral-7b-hf"
)],
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"llava-hf/llava-v1.6-mistral-7b-hf"
)],
...
@@ -340,6 +358,12 @@ VLM_TEST_SETTINGS = {
...
@@ -340,6 +358,12 @@ VLM_TEST_SETTINGS = {
auto_cls
=
AutoModelForVision2Seq
,
auto_cls
=
AutoModelForVision2Seq
,
vllm_output_post_proc
=
model_utils
.
mantis_vllm_to_hf_output
,
vllm_output_post_proc
=
model_utils
.
mantis_vllm_to_hf_output
,
patch_hf_runner
=
model_utils
.
mantis_patch_hf_runner
,
patch_hf_runner
=
model_utils
.
mantis_patch_hf_runner
,
marks
=
[
pytest
.
mark
.
skipif
(
Version
(
TRANSFORMERS_VERSION
)
>=
Version
(
"4.48"
),
reason
=
"HF model is not compatible with transformers>=4.48"
,
)
],
),
),
"minicpmv_25"
:
VLMTestInfo
(
"minicpmv_25"
:
VLMTestInfo
(
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"openbmb/MiniCPM-Llama3-V-2_5"
)],
models
=
[
os
.
path
.
join
(
models_path_prefix
,
"openbmb/MiniCPM-Llama3-V-2_5"
)],
...
...
tests/models/decoder_only/vision_language/test_phi3v.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
os
import
os
import
re
import
re
from
typing
import
List
,
Optional
,
Tuple
,
Type
from
typing
import
List
,
Optional
,
Tuple
,
Type
...
...
tests/models/decoder_only/vision_language/test_pixtral.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
"""Compare the outputs of HF and vLLM for Mistral models using greedy sampling.
Run `pytest tests/models/test_mistral.py`.
Run `pytest tests/models/test_mistral.py`.
...
...
tests/models/decoder_only/vision_language/test_qwen2_vl.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Any
,
List
,
Optional
,
Tuple
,
Type
,
TypedDict
,
Union
from
typing
import
Any
,
List
,
Optional
,
Tuple
,
Type
,
TypedDict
,
Union
import
os
import
os
...
...
tests/models/decoder_only/vision_language/vlm_utils/builders.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Helpers for building inputs that can be leveraged for different test types.
"""Helpers for building inputs that can be leveraged for different test types.
"""
"""
from
pathlib
import
PosixPath
from
pathlib
import
PosixPath
...
...
tests/models/decoder_only/vision_language/vlm_utils/case_filtering.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Utils for determining which subset of model tests belong to a specific
"""Utils for determining which subset of model tests belong to a specific
modality, getting all combinations (similar to pytest's parametrization),
modality, getting all combinations (similar to pytest's parametrization),
handling multimodal placeholder substitution, and so on.
handling multimodal placeholder substitution, and so on.
...
...
tests/models/decoder_only/vision_language/vlm_utils/core.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Core test implementation to be shared across modalities."""
"""Core test implementation to be shared across modalities."""
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Tuple
,
Type
,
Union
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Tuple
,
Type
,
Union
...
@@ -153,4 +154,4 @@ def process_runner_outputs(
...
@@ -153,4 +154,4 @@ def process_runner_outputs(
def
process_outputs
(
output_processor
,
model
,
outputs_per_image
):
def
process_outputs
(
output_processor
,
model
,
outputs_per_image
):
"""Applies a model specific post-processor function to a runner's output"""
"""Applies a model specific post-processor function to a runner's output"""
return
[[
output_processor
(
res
,
model
)
for
res
in
outputs
]
return
[[
output_processor
(
res
,
model
)
for
res
in
outputs
]
for
outputs
in
outputs_per_image
]
for
outputs
in
outputs_per_image
]
\ No newline at end of file
tests/models/decoder_only/vision_language/vlm_utils/custom_inputs.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Custom input builders for edge-cases in different models."""
"""Custom input builders for edge-cases in different models."""
from
typing
import
Callable
from
typing
import
Callable
...
...
tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Common utility functions relating to different models that are useful
"""Common utility functions relating to different models that are useful
for manipulating the input / output of HF & vLLM test runners, which are
for manipulating the input / output of HF & vLLM test runners, which are
typically specific to a small subset of models.
typically specific to a small subset of models.
...
@@ -191,6 +192,14 @@ def deepseekvl2_trunc_hf_output(hf_output: RunnerOutput,
...
@@ -191,6 +192,14 @@ def deepseekvl2_trunc_hf_output(hf_output: RunnerOutput,
return
output_ids
,
output_str
,
out_logprobs
return
output_ids
,
output_str
,
out_logprobs
def
idefics3_trunc_hf_output
(
hf_output
:
RunnerOutput
,
model
:
str
)
->
RunnerOutput
:
output_ids
,
output_str
,
out_logprobs
=
hf_output
if
output_str
.
endswith
(
"<end_of_utterance>"
):
output_str
=
output_str
.
split
(
"<end_of_utterance>"
)[
0
]
return
output_ids
,
output_str
,
out_logprobs
def
minicpmv_trunc_hf_output
(
hf_output
:
RunnerOutput
,
def
minicpmv_trunc_hf_output
(
hf_output
:
RunnerOutput
,
model
:
str
)
->
RunnerOutput
:
model
:
str
)
->
RunnerOutput
:
output_ids
,
output_str
,
out_logprobs
=
hf_output
output_ids
,
output_str
,
out_logprobs
=
hf_output
...
@@ -333,12 +342,12 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -333,12 +342,12 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
def
__init__
(
self
,
hf_runner
:
HfRunner
):
def
__init__
(
self
,
hf_runner
:
HfRunner
):
self
.
num_image_token
=
hf_runner
.
model
.
num_image_token
self
.
num_image_token
=
hf_runner
.
model
.
num_image_token
self
.
tokenizer
=
hf_runner
.
tokenizer
self
.
tokenizer
=
hf_runner
.
tokenizer
self
.
dtype
=
hf_runner
.
model
.
dtype
self
.
config
=
AutoConfig
.
from_pretrained
(
hf_runner
.
model_name
,
self
.
config
=
AutoConfig
.
from_pretrained
(
hf_runner
.
model_name
,
trust_remote_code
=
True
)
trust_remote_code
=
True
)
self
.
vision_config
=
self
.
config
.
vision_config
self
.
vision_config
=
self
.
config
.
vision_config
self
.
use_thumbnail
=
self
.
config
.
use_thumbnail
self
.
use_thumbnail
=
self
.
config
.
use_thumbnail
self
.
use_msac
=
self
.
config
.
use_msac
self
.
min_num
=
self
.
config
.
min_dynamic_patch
self
.
min_num
=
self
.
config
.
min_dynamic_patch
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
image_size
=
self
.
vision_config
.
image_size
self
.
image_size
=
self
.
vision_config
.
image_size
...
@@ -347,18 +356,19 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -347,18 +356,19 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
**
kwargs
):
**
kwargs
):
# yapf: disable
# yapf: disable
from
vllm.model_executor.models.h2ovl
import
(
from
vllm.model_executor.models.h2ovl
import
(
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
image_to_pixel_values
)
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
image_to_pixel_values
_h2ovl
)
# yapf: enable
# yapf: enable
images
=
[
images
]
if
isinstance
(
images
,
Image
)
else
images
images
=
[
images
]
if
isinstance
(
images
,
Image
)
else
images
pixel_values
=
[
pixel_values
=
[
image_to_pixel_values
(
image
,
image_to_pixel_values_h2ovl
(
self
.
image_size
,
image
,
self
.
min_num
,
input_size
=
self
.
image_size
,
self
.
max_num
,
min_num
=
self
.
min_num
,
self
.
use_thumbnail
,
max_num
=
self
.
max_num
,
use_MSAC
=
self
.
config
.
use_msac
).
to
(
use_thumbnail
=
self
.
use_thumbnail
,
self
.
dtype
)
for
image
in
images
use_msac
=
self
.
use_msac
,
)
for
image
in
images
]
]
num_patches_list
=
[
num_patches_list
=
[
pixel_value
.
shape
[
0
]
for
pixel_value
in
pixel_values
pixel_value
.
shape
[
0
]
for
pixel_value
in
pixel_values
...
@@ -393,7 +403,6 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -393,7 +403,6 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
def
__init__
(
self
,
hf_runner
:
HfRunner
):
def
__init__
(
self
,
hf_runner
:
HfRunner
):
self
.
num_image_token
=
hf_runner
.
model
.
num_image_token
self
.
num_image_token
=
hf_runner
.
model
.
num_image_token
self
.
tokenizer
=
hf_runner
.
tokenizer
self
.
tokenizer
=
hf_runner
.
tokenizer
self
.
dtype
=
hf_runner
.
model
.
dtype
self
.
config
=
AutoConfig
.
from_pretrained
(
hf_runner
.
model_name
,
self
.
config
=
AutoConfig
.
from_pretrained
(
hf_runner
.
model_name
,
trust_remote_code
=
True
)
trust_remote_code
=
True
)
...
@@ -406,13 +415,17 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -406,13 +415,17 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
List
[
Image
]],
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
List
[
Image
]],
**
kwargs
):
**
kwargs
):
from
vllm.model_executor.models.internvl
import
(
from
vllm.model_executor.models.internvl
import
(
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
image_to_pixel_values
)
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
image_to_pixel_values_internvl
)
images
=
[
images
]
if
isinstance
(
images
,
Image
)
else
images
images
=
[
images
]
if
isinstance
(
images
,
Image
)
else
images
pixel_values
=
[
pixel_values
=
[
image_to_pixel_values
(
image
,
self
.
image_size
,
self
.
min_num
,
image_to_pixel_values_internvl
(
self
.
max_num
,
image
,
self
.
use_thumbnail
).
to
(
self
.
dtype
)
input_size
=
self
.
image_size
,
for
image
in
images
min_num
=
self
.
min_num
,
max_num
=
self
.
max_num
,
use_thumbnail
=
self
.
use_thumbnail
,
)
for
image
in
images
]
]
num_patches_list
=
[
num_patches_list
=
[
pixel_value
.
shape
[
0
]
for
pixel_value
in
pixel_values
pixel_value
.
shape
[
0
]
for
pixel_value
in
pixel_values
...
@@ -447,7 +460,8 @@ def _internvl_generate(
...
@@ -447,7 +460,8 @@ def _internvl_generate(
)
->
torch
.
LongTensor
:
)
->
torch
.
LongTensor
:
"""Generate method for InternVL2 model without fixed use_cache."""
"""Generate method for InternVL2 model without fixed use_cache."""
assert
self
.
img_context_token_id
is
not
None
assert
self
.
img_context_token_id
is
not
None
vit_embeds
=
self
.
extract_feature
(
pixel_values
)
target_dtype
=
next
(
self
.
parameters
()).
dtype
vit_embeds
=
self
.
extract_feature
(
pixel_values
.
to
(
target_dtype
))
input_embeds
=
self
.
language_model
.
get_input_embeddings
()(
input_ids
)
input_embeds
=
self
.
language_model
.
get_input_embeddings
()(
input_ids
)
B
,
N
,
C
=
input_embeds
.
shape
B
,
N
,
C
=
input_embeds
.
shape
input_embeds
=
input_embeds
.
reshape
(
B
*
N
,
C
)
input_embeds
=
input_embeds
.
reshape
(
B
*
N
,
C
)
...
...
tests/models/decoder_only/vision_language/vlm_utils/runners.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Entrypoints for wrapping the core run_test implementation for specific test
"""Entrypoints for wrapping the core run_test implementation for specific test
types / modalities.
types / modalities.
"""
"""
...
...
Prev
1
…
13
14
15
16
17
18
19
20
21
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment