Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cf069aa8
Unverified
Commit
cf069aa8
authored
Mar 03, 2025
by
Harry Mellor
Committed by
GitHub
Mar 02, 2025
Browse files
Update deprecated Python 3.8 typing (#13971)
parent
bf33700e
Changes
300
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
122 additions
and
130 deletions
+122
-130
tests/lora/test_quant_model.py
tests/lora/test_quant_model.py
+3
-4
tests/lora/test_qwen2vl.py
tests/lora/test_qwen2vl.py
+5
-5
tests/lora/test_transfomers_model.py
tests/lora/test_transfomers_model.py
+2
-4
tests/lora/test_ultravox.py
tests/lora/test_ultravox.py
+3
-4
tests/lora/utils.py
tests/lora/utils.py
+7
-7
tests/metrics/test_metrics.py
tests/metrics/test_metrics.py
+1
-2
tests/mistral_tool_use/utils.py
tests/mistral_tool_use/utils.py
+4
-4
tests/model_executor/test_enabled_custom_ops.py
tests/model_executor/test_enabled_custom_ops.py
+1
-3
tests/models/decoder_only/audio_language/test_ultravox.py
tests/models/decoder_only/audio_language/test_ultravox.py
+8
-8
tests/models/decoder_only/language/test_gguf.py
tests/models/decoder_only/language/test_gguf.py
+3
-3
tests/models/decoder_only/language/test_modelopt.py
tests/models/decoder_only/language/test_modelopt.py
+1
-2
tests/models/decoder_only/vision_language/test_awq.py
tests/models/decoder_only/vision_language/test_awq.py
+3
-3
tests/models/decoder_only/vision_language/test_models.py
tests/models/decoder_only/vision_language/test_models.py
+19
-20
tests/models/decoder_only/vision_language/test_phi3v.py
tests/models/decoder_only/vision_language/test_phi3v.py
+5
-5
tests/models/decoder_only/vision_language/test_pixtral.py
tests/models/decoder_only/vision_language/test_pixtral.py
+6
-6
tests/models/decoder_only/vision_language/test_qwen2_vl.py
tests/models/decoder_only/vision_language/test_qwen2_vl.py
+23
-23
tests/models/decoder_only/vision_language/vlm_utils/builders.py
...models/decoder_only/vision_language/vlm_utils/builders.py
+4
-3
tests/models/decoder_only/vision_language/vlm_utils/case_filtering.py
.../decoder_only/vision_language/vlm_utils/case_filtering.py
+5
-5
tests/models/decoder_only/vision_language/vlm_utils/core.py
tests/models/decoder_only/vision_language/vlm_utils/core.py
+11
-11
tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
...els/decoder_only/vision_language/vlm_utils/model_utils.py
+8
-8
No files found.
tests/lora/test_quant_model.py
View file @
cf069aa8
...
@@ -3,7 +3,6 @@
...
@@ -3,7 +3,6 @@
# Adapted from
# Adapted from
# https://github.com/fmmoret/vllm/blob/fm-support-lora-on-quantized-models/tests/lora/test_llama.py
# https://github.com/fmmoret/vllm/blob/fm-support-lora-on-quantized-models/tests/lora/test_llama.py
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
List
import
pytest
import
pytest
...
@@ -19,7 +18,7 @@ class ModelWithQuantization:
...
@@ -19,7 +18,7 @@ class ModelWithQuantization:
quantization
:
str
quantization
:
str
MODELS
:
L
ist
[
ModelWithQuantization
]
MODELS
:
l
ist
[
ModelWithQuantization
]
#AWQ quantization is currently not supported in ROCm.
#AWQ quantization is currently not supported in ROCm.
if
current_platform
.
is_rocm
():
if
current_platform
.
is_rocm
():
MODELS
=
[
MODELS
=
[
...
@@ -41,7 +40,7 @@ else:
...
@@ -41,7 +40,7 @@ else:
def
do_sample
(
llm
:
vllm
.
LLM
,
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_path
:
str
,
lora_id
:
int
,
lora_id
:
int
,
max_tokens
:
int
=
256
)
->
L
ist
[
str
]:
max_tokens
:
int
=
256
)
->
l
ist
[
str
]:
raw_prompts
=
[
raw_prompts
=
[
"Give me an orange-ish brown color"
,
"Give me an orange-ish brown color"
,
"Give me a neon pink color"
,
"Give me a neon pink color"
,
...
@@ -61,7 +60,7 @@ def do_sample(llm: vllm.LLM,
...
@@ -61,7 +60,7 @@ def do_sample(llm: vllm.LLM,
lora_request
=
LoRARequest
(
str
(
lora_id
),
lora_id
,
lora_path
)
lora_request
=
LoRARequest
(
str
(
lora_id
),
lora_id
,
lora_path
)
if
lora_id
else
None
)
if
lora_id
else
None
)
# Print the outputs.
# Print the outputs.
generated_texts
:
L
ist
[
str
]
=
[]
generated_texts
:
l
ist
[
str
]
=
[]
for
output
in
outputs
:
for
output
in
outputs
:
prompt
=
output
.
prompt
prompt
=
output
.
prompt
generated_text
=
output
.
outputs
[
0
].
text
generated_text
=
output
.
outputs
[
0
].
text
...
...
tests/lora/test_qwen2vl.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Optional
import
pytest
import
pytest
from
packaging.version
import
Version
from
packaging.version
import
Version
...
@@ -20,7 +20,7 @@ class TestConfig:
...
@@ -20,7 +20,7 @@ class TestConfig:
max_loras
:
int
=
2
max_loras
:
int
=
2
max_lora_rank
:
int
=
16
max_lora_rank
:
int
=
16
max_model_len
:
int
=
4096
max_model_len
:
int
=
4096
mm_processor_kwargs
:
Optional
[
D
ict
[
str
,
int
]]
=
None
mm_processor_kwargs
:
Optional
[
d
ict
[
str
,
int
]]
=
None
def
__post_init__
(
self
):
def
__post_init__
(
self
):
if
self
.
mm_processor_kwargs
is
None
:
if
self
.
mm_processor_kwargs
is
None
:
...
@@ -57,11 +57,11 @@ class Qwen2VLTester:
...
@@ -57,11 +57,11 @@ class Qwen2VLTester:
)
)
def
run_test
(
self
,
def
run_test
(
self
,
images
:
L
ist
[
ImageAsset
],
images
:
l
ist
[
ImageAsset
],
expected_outputs
:
L
ist
[
str
],
expected_outputs
:
l
ist
[
str
],
lora_id
:
Optional
[
int
]
=
None
,
lora_id
:
Optional
[
int
]
=
None
,
temperature
:
float
=
0
,
temperature
:
float
=
0
,
max_tokens
:
int
=
5
)
->
L
ist
[
str
]:
max_tokens
:
int
=
5
)
->
l
ist
[
str
]:
sampling_params
=
vllm
.
SamplingParams
(
sampling_params
=
vllm
.
SamplingParams
(
temperature
=
temperature
,
temperature
=
temperature
,
...
...
tests/lora/test_transfomers_model.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
import
pytest
import
pytest
import
vllm
import
vllm
...
@@ -21,7 +19,7 @@ EXPECTED_LORA_OUTPUT = [
...
@@ -21,7 +19,7 @@ EXPECTED_LORA_OUTPUT = [
]
]
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_id
:
int
)
->
L
ist
[
str
]:
def
do_sample
(
llm
:
vllm
.
LLM
,
lora_path
:
str
,
lora_id
:
int
)
->
l
ist
[
str
]:
prompts
=
[
prompts
=
[
PROMPT_TEMPLATE
.
format
(
query
=
"How many singers do we have?"
),
PROMPT_TEMPLATE
.
format
(
query
=
"How many singers do we have?"
),
PROMPT_TEMPLATE
.
format
(
PROMPT_TEMPLATE
.
format
(
...
@@ -40,7 +38,7 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
...
@@ -40,7 +38,7 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> List[str]:
lora_request
=
LoRARequest
(
str
(
lora_id
),
lora_id
,
lora_path
)
lora_request
=
LoRARequest
(
str
(
lora_id
),
lora_id
,
lora_path
)
if
lora_id
else
None
)
if
lora_id
else
None
)
# Print the outputs.
# Print the outputs.
generated_texts
:
L
ist
[
str
]
=
[]
generated_texts
:
l
ist
[
str
]
=
[]
for
output
in
outputs
:
for
output
in
outputs
:
prompt
=
output
.
prompt
prompt
=
output
.
prompt
generated_text
=
output
.
outputs
[
0
].
text
.
strip
()
generated_text
=
output
.
outputs
[
0
].
text
.
strip
()
...
...
tests/lora/test_ultravox.py
View file @
cf069aa8
...
@@ -3,7 +3,6 @@
...
@@ -3,7 +3,6 @@
import
shutil
import
shutil
from
os
import
path
from
os
import
path
from
tempfile
import
TemporaryDirectory
from
tempfile
import
TemporaryDirectory
from
typing
import
List
,
Tuple
import
torch
import
torch
from
huggingface_hub
import
snapshot_download
from
huggingface_hub
import
snapshot_download
...
@@ -86,8 +85,8 @@ def test_ultravox_lora(vllm_runner):
...
@@ -86,8 +85,8 @@ def test_ultravox_lora(vllm_runner):
dtype
=
"bfloat16"
,
dtype
=
"bfloat16"
,
max_model_len
=
1024
,
max_model_len
=
1024
,
)
as
vllm_model
:
)
as
vllm_model
:
ultravox_outputs
:
L
ist
[
T
uple
[
ultravox_outputs
:
l
ist
[
t
uple
[
L
ist
[
int
],
str
]]
=
vllm_model
.
generate_greedy
(
l
ist
[
int
],
str
]]
=
vllm_model
.
generate_greedy
(
[
[
_get_prompt
(
0
,
PROMPT
,
VLLM_PLACEHOLDER
,
_get_prompt
(
0
,
PROMPT
,
VLLM_PLACEHOLDER
,
ULTRAVOX_MODEL_NAME
)
ULTRAVOX_MODEL_NAME
)
...
@@ -108,7 +107,7 @@ def test_ultravox_lora(vllm_runner):
...
@@ -108,7 +107,7 @@ def test_ultravox_lora(vllm_runner):
dtype
=
"bfloat16"
,
dtype
=
"bfloat16"
,
max_model_len
=
1024
,
max_model_len
=
1024
,
)
as
vllm_model
:
)
as
vllm_model
:
llama_outputs
:
L
ist
[
T
uple
[
L
ist
[
int
],
str
]]
=
(
llama_outputs
:
l
ist
[
t
uple
[
l
ist
[
int
],
str
]]
=
(
vllm_model
.
generate_greedy
(
vllm_model
.
generate_greedy
(
[
_get_prompt
(
0
,
PROMPT
,
VLLM_PLACEHOLDER
,
LLMA_MODEL_NAME
)],
[
_get_prompt
(
0
,
PROMPT
,
VLLM_PLACEHOLDER
,
LLMA_MODEL_NAME
)],
256
,
256
,
...
...
tests/lora/utils.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
dataclasses
import
dataclass
from
dataclasses
import
dataclass
from
typing
import
Dict
,
List
,
Optional
,
Tuple
,
Union
from
typing
import
Optional
,
Union
import
torch
import
torch
...
@@ -12,7 +12,7 @@ class DummyLoRAManager:
...
@@ -12,7 +12,7 @@ class DummyLoRAManager:
def
__init__
(
self
,
device
:
torch
.
device
=
"cuda:0"
):
def
__init__
(
self
,
device
:
torch
.
device
=
"cuda:0"
):
super
().
__init__
()
super
().
__init__
()
self
.
_loras
:
D
ict
[
str
,
LoRALayerWeights
]
=
{}
self
.
_loras
:
d
ict
[
str
,
LoRALayerWeights
]
=
{}
self
.
_device
=
device
self
.
_device
=
device
def
set_module_lora
(
self
,
module_name
:
str
,
lora
:
LoRALayerWeights
):
def
set_module_lora
(
self
,
module_name
:
str
,
lora
:
LoRALayerWeights
):
...
@@ -77,11 +77,11 @@ class DummyLoRAManager:
...
@@ -77,11 +77,11 @@ class DummyLoRAManager:
self
,
self
,
module_name
:
str
,
module_name
:
str
,
input_dim
:
int
,
input_dim
:
int
,
output_dims
:
L
ist
[
int
],
output_dims
:
l
ist
[
int
],
noop_lora_index
:
Optional
[
L
ist
[
int
]]
=
None
,
noop_lora_index
:
Optional
[
l
ist
[
int
]]
=
None
,
rank
:
int
=
8
,
rank
:
int
=
8
,
):
):
base_loras
:
L
ist
[
LoRALayerWeights
]
=
[]
base_loras
:
l
ist
[
LoRALayerWeights
]
=
[]
noop_lora_index_set
=
set
(
noop_lora_index
or
[])
noop_lora_index_set
=
set
(
noop_lora_index
or
[])
for
i
,
out_dim
in
enumerate
(
output_dims
):
for
i
,
out_dim
in
enumerate
(
output_dims
):
...
@@ -110,7 +110,7 @@ def assert_close(a, b):
...
@@ -110,7 +110,7 @@ def assert_close(a, b):
@
dataclass
@
dataclass
class
PunicaTensors
:
class
PunicaTensors
:
inputs_tensor
:
torch
.
Tensor
inputs_tensor
:
torch
.
Tensor
lora_weights
:
Union
[
torch
.
Tensor
,
L
ist
[
torch
.
Tensor
]]
lora_weights
:
Union
[
torch
.
Tensor
,
l
ist
[
torch
.
Tensor
]]
our_out_tensor
:
torch
.
Tensor
our_out_tensor
:
torch
.
Tensor
ref_out_tensor
:
torch
.
Tensor
ref_out_tensor
:
torch
.
Tensor
b_seq_start_loc
:
torch
.
Tensor
b_seq_start_loc
:
torch
.
Tensor
...
@@ -118,7 +118,7 @@ class PunicaTensors:
...
@@ -118,7 +118,7 @@ class PunicaTensors:
seq_len_tensor
:
torch
.
Tensor
seq_len_tensor
:
torch
.
Tensor
token_lora_mapping
:
torch
.
Tensor
token_lora_mapping
:
torch
.
Tensor
def
meta
(
self
)
->
T
uple
[
int
,
int
]:
def
meta
(
self
)
->
t
uple
[
int
,
int
]:
"""
"""
Infer max_seq_length and token_nums from the tensors
Infer max_seq_length and token_nums from the tensors
and return them.
and return them.
...
...
tests/metrics/test_metrics.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
import
time
import
time
from
typing
import
List
import
pytest
import
pytest
import
ray
import
ray
...
@@ -133,7 +132,7 @@ def test_metric_counter_generation_tokens_multi_step(
...
@@ -133,7 +132,7 @@ def test_metric_counter_generation_tokens_multi_step(
"served_model_name"
,
"served_model_name"
,
[
None
,
[],
[
"ModelName0"
],
[
"ModelName0"
,
"ModelName1"
,
"ModelName2"
]])
[
None
,
[],
[
"ModelName0"
],
[
"ModelName0"
,
"ModelName1"
,
"ModelName2"
]])
def
test_metric_set_tag_model_name
(
vllm_runner
,
model
:
str
,
dtype
:
str
,
def
test_metric_set_tag_model_name
(
vllm_runner
,
model
:
str
,
dtype
:
str
,
served_model_name
:
L
ist
[
str
])
->
None
:
served_model_name
:
l
ist
[
str
])
->
None
:
with
vllm_runner
(
model
,
with
vllm_runner
(
model
,
dtype
=
dtype
,
dtype
=
dtype
,
disable_log_stats
=
False
,
disable_log_stats
=
False
,
...
...
tests/mistral_tool_use/utils.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Optional
from
typing_extensions
import
TypedDict
from
typing_extensions
import
TypedDict
class
ServerConfig
(
TypedDict
,
total
=
False
):
class
ServerConfig
(
TypedDict
,
total
=
False
):
model
:
str
model
:
str
arguments
:
L
ist
[
str
]
arguments
:
l
ist
[
str
]
system_prompt
:
Optional
[
str
]
system_prompt
:
Optional
[
str
]
supports_parallel
:
Optional
[
bool
]
supports_parallel
:
Optional
[
bool
]
supports_rocm
:
Optional
[
bool
]
supports_rocm
:
Optional
[
bool
]
ARGS
:
L
ist
[
str
]
=
[
"--max-model-len"
,
"1024"
]
ARGS
:
l
ist
[
str
]
=
[
"--max-model-len"
,
"1024"
]
CONFIGS
:
D
ict
[
str
,
ServerConfig
]
=
{
CONFIGS
:
d
ict
[
str
,
ServerConfig
]
=
{
"mistral"
:
{
"mistral"
:
{
"model"
:
"model"
:
"mistralai/Mistral-7B-Instruct-v0.3"
,
"mistralai/Mistral-7B-Instruct-v0.3"
,
...
...
tests/model_executor/test_enabled_custom_ops.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
import
pytest
import
pytest
from
vllm.config
import
CompilationConfig
,
VllmConfig
,
set_current_vllm_config
from
vllm.config
import
CompilationConfig
,
VllmConfig
,
set_current_vllm_config
...
@@ -51,7 +49,7 @@ class Relu3(ReLUSquaredActivation):
...
@@ -51,7 +49,7 @@ class Relu3(ReLUSquaredActivation):
# All but RMSNorm
# All but RMSNorm
(
"all,-rms_norm"
,
4
,
[
0
,
1
,
1
,
1
],
True
),
(
"all,-rms_norm"
,
4
,
[
0
,
1
,
1
,
1
],
True
),
])
])
def
test_enabled_ops
(
env
:
str
,
torch_level
:
int
,
ops_enabled
:
L
ist
[
int
],
def
test_enabled_ops
(
env
:
str
,
torch_level
:
int
,
ops_enabled
:
l
ist
[
int
],
default_on
:
bool
):
default_on
:
bool
):
vllm_config
=
VllmConfig
(
compilation_config
=
CompilationConfig
(
vllm_config
=
VllmConfig
(
compilation_config
=
CompilationConfig
(
level
=
torch_level
,
custom_ops
=
env
.
split
(
","
)))
level
=
torch_level
,
custom_ops
=
env
.
split
(
","
)))
...
...
tests/models/decoder_only/audio_language/test_ultravox.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Optional
,
Tuple
,
Type
from
typing
import
Optional
import
numpy
as
np
import
numpy
as
np
import
pytest
import
pytest
...
@@ -17,7 +17,7 @@ from ...utils import check_logprobs_close
...
@@ -17,7 +17,7 @@ from ...utils import check_logprobs_close
MODEL_NAME
=
"fixie-ai/ultravox-v0_4"
MODEL_NAME
=
"fixie-ai/ultravox-v0_4"
AudioTuple
=
T
uple
[
np
.
ndarray
,
int
]
AudioTuple
=
t
uple
[
np
.
ndarray
,
int
]
VLLM_PLACEHOLDER
=
"<|audio|>"
VLLM_PLACEHOLDER
=
"<|audio|>"
HF_PLACEHOLDER
=
"<|audio|>"
HF_PLACEHOLDER
=
"<|audio|>"
...
@@ -78,7 +78,7 @@ def _get_prompt(audio_count, question, placeholder):
...
@@ -78,7 +78,7 @@ def _get_prompt(audio_count, question, placeholder):
add_generation_prompt
=
True
)
add_generation_prompt
=
True
)
def
vllm_to_hf_output
(
vllm_output
:
T
uple
[
L
ist
[
int
],
str
,
def
vllm_to_hf_output
(
vllm_output
:
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
Optional
[
SampleLogprobs
]],
model
:
str
):
model
:
str
):
"""Sanitize vllm output to be comparable with hf output."""
"""Sanitize vllm output to be comparable with hf output."""
...
@@ -96,9 +96,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
...
@@ -96,9 +96,9 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
def
run_test
(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
prompts_and_audios
:
L
ist
[
T
uple
[
str
,
str
,
AudioTuple
]],
prompts_and_audios
:
l
ist
[
t
uple
[
str
,
str
,
AudioTuple
]],
model
:
str
,
model
:
str
,
*
,
*
,
dtype
:
str
,
dtype
:
str
,
...
@@ -158,8 +158,8 @@ def run_test(
...
@@ -158,8 +158,8 @@ def run_test(
def
run_multi_audio_test
(
def
run_multi_audio_test
(
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
prompts_and_audios
:
L
ist
[
T
uple
[
str
,
L
ist
[
AudioTuple
]]],
prompts_and_audios
:
l
ist
[
t
uple
[
str
,
l
ist
[
AudioTuple
]]],
model
:
str
,
model
:
str
,
*
,
*
,
dtype
:
str
,
dtype
:
str
,
...
...
tests/models/decoder_only/language/test_gguf.py
View file @
cf069aa8
...
@@ -5,7 +5,7 @@ Note: To pass the test, quantization higher than Q4 should be used
...
@@ -5,7 +5,7 @@ Note: To pass the test, quantization higher than Q4 should be used
"""
"""
import
os
import
os
from
typing
import
List
,
NamedTuple
,
Type
from
typing
import
NamedTuple
import
pytest
import
pytest
from
huggingface_hub
import
hf_hub_download
from
huggingface_hub
import
hf_hub_download
...
@@ -90,8 +90,8 @@ MODELS = [
...
@@ -90,8 +90,8 @@ MODELS = [
@
pytest
.
mark
.
parametrize
(
"tp_size"
,
[
1
,
2
])
@
pytest
.
mark
.
parametrize
(
"tp_size"
,
[
1
,
2
])
def
test_models
(
def
test_models
(
num_gpus_available
:
int
,
num_gpus_available
:
int
,
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
example_prompts
:
L
ist
[
str
],
example_prompts
:
l
ist
[
str
],
model
:
GGUFTestConfig
,
model
:
GGUFTestConfig
,
dtype
:
str
,
dtype
:
str
,
max_tokens
:
int
,
max_tokens
:
int
,
...
...
tests/models/decoder_only/language/test_modelopt.py
View file @
cf069aa8
...
@@ -5,7 +5,6 @@
...
@@ -5,7 +5,6 @@
Note: these tests will only pass on H100
Note: these tests will only pass on H100
"""
"""
import
os
import
os
from
typing
import
List
import
pytest
import
pytest
from
transformers
import
AutoTokenizer
from
transformers
import
AutoTokenizer
...
@@ -65,7 +64,7 @@ def test_models(example_prompts, model_name) -> None:
...
@@ -65,7 +64,7 @@ def test_models(example_prompts, model_name) -> None:
for
prompt
in
example_prompts
for
prompt
in
example_prompts
]
]
params
=
SamplingParams
(
max_tokens
=
20
,
temperature
=
0
)
params
=
SamplingParams
(
max_tokens
=
20
,
temperature
=
0
)
generations
:
L
ist
[
str
]
=
[]
generations
:
l
ist
[
str
]
=
[]
# Note: these need to be run 1 at a time due to numerical precision,
# Note: these need to be run 1 at a time due to numerical precision,
# since the expected strs were generated this way.
# since the expected strs were generated this way.
for
prompt
in
formatted_prompts
:
for
prompt
in
formatted_prompts
:
...
...
tests/models/decoder_only/vision_language/test_awq.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
List
,
Optional
,
Type
from
typing
import
Optional
import
pytest
import
pytest
import
torch
import
torch
...
@@ -19,12 +19,12 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
...
@@ -19,12 +19,12 @@ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
def
run_awq_test
(
def
run_awq_test
(
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
,
image_assets
:
_ImageAssets
,
source_model
:
str
,
source_model
:
str
,
quant_model
:
str
,
quant_model
:
str
,
*
,
*
,
size_factors
:
L
ist
[
float
],
size_factors
:
l
ist
[
float
],
dtype
:
str
,
dtype
:
str
,
max_tokens
:
int
,
max_tokens
:
int
,
num_logprobs
:
int
,
num_logprobs
:
int
,
...
...
tests/models/decoder_only/vision_language/test_models.py
View file @
cf069aa8
...
@@ -6,7 +6,6 @@ import math
...
@@ -6,7 +6,6 @@ import math
import
os
import
os
from
collections
import
defaultdict
from
collections
import
defaultdict
from
pathlib
import
PosixPath
from
pathlib
import
PosixPath
from
typing
import
Type
import
pytest
import
pytest
from
packaging.version
import
Version
from
packaging.version
import
Version
...
@@ -562,8 +561,8 @@ VLM_TEST_SETTINGS = _mark_splits(VLM_TEST_SETTINGS, num_groups=2)
...
@@ -562,8 +561,8 @@ VLM_TEST_SETTINGS = _mark_splits(VLM_TEST_SETTINGS, num_groups=2)
))
))
def
test_single_image_models
(
tmp_path
:
PosixPath
,
model_type
:
str
,
def
test_single_image_models
(
tmp_path
:
PosixPath
,
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
image_assets
:
_ImageAssets
):
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
runners
.
run_single_image_test
(
runners
.
run_single_image_test
(
...
@@ -585,8 +584,8 @@ def test_single_image_models(tmp_path: PosixPath, model_type: str,
...
@@ -585,8 +584,8 @@ def test_single_image_models(tmp_path: PosixPath, model_type: str,
))
))
def
test_multi_image_models
(
tmp_path
:
PosixPath
,
model_type
:
str
,
def
test_multi_image_models
(
tmp_path
:
PosixPath
,
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
image_assets
:
_ImageAssets
):
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
runners
.
run_multi_image_test
(
runners
.
run_multi_image_test
(
...
@@ -608,8 +607,8 @@ def test_multi_image_models(tmp_path: PosixPath, model_type: str,
...
@@ -608,8 +607,8 @@ def test_multi_image_models(tmp_path: PosixPath, model_type: str,
))
))
def
test_image_embedding_models
(
model_type
:
str
,
def
test_image_embedding_models
(
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
image_assets
:
_ImageAssets
):
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
runners
.
run_embedding_test
(
runners
.
run_embedding_test
(
...
@@ -629,7 +628,7 @@ def test_image_embedding_models(model_type: str,
...
@@ -629,7 +628,7 @@ def test_image_embedding_models(model_type: str,
fork_new_process_for_each_test
=
False
,
fork_new_process_for_each_test
=
False
,
))
))
def
test_video_models
(
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
def
test_video_models
(
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
video_assets
:
_VideoAssets
):
video_assets
:
_VideoAssets
):
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
runners
.
run_video_test
(
runners
.
run_video_test
(
...
@@ -651,8 +650,8 @@ def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
...
@@ -651,8 +650,8 @@ def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs,
def
test_custom_inputs_models
(
def
test_custom_inputs_models
(
model_type
:
str
,
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
):
):
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
runners
.
run_custom_inputs_test
(
runners
.
run_custom_inputs_test
(
...
@@ -674,8 +673,8 @@ def test_custom_inputs_models(
...
@@ -674,8 +673,8 @@ def test_custom_inputs_models(
@
fork_new_process_for_each_test
@
fork_new_process_for_each_test
def
test_single_image_models_heavy
(
tmp_path
:
PosixPath
,
model_type
:
str
,
def
test_single_image_models_heavy
(
tmp_path
:
PosixPath
,
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
image_assets
:
_ImageAssets
):
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
runners
.
run_single_image_test
(
runners
.
run_single_image_test
(
...
@@ -698,8 +697,8 @@ def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
...
@@ -698,8 +697,8 @@ def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str,
@
fork_new_process_for_each_test
@
fork_new_process_for_each_test
def
test_multi_image_models_heavy
(
tmp_path
:
PosixPath
,
model_type
:
str
,
def
test_multi_image_models_heavy
(
tmp_path
:
PosixPath
,
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
image_assets
:
_ImageAssets
):
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
runners
.
run_multi_image_test
(
runners
.
run_multi_image_test
(
...
@@ -722,8 +721,8 @@ def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
...
@@ -722,8 +721,8 @@ def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str,
@
fork_new_process_for_each_test
@
fork_new_process_for_each_test
def
test_image_embedding_models_heavy
(
model_type
:
str
,
def
test_image_embedding_models_heavy
(
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
image_assets
:
_ImageAssets
):
image_assets
:
_ImageAssets
):
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
runners
.
run_embedding_test
(
runners
.
run_embedding_test
(
...
@@ -743,8 +742,8 @@ def test_image_embedding_models_heavy(model_type: str,
...
@@ -743,8 +742,8 @@ def test_image_embedding_models_heavy(model_type: str,
fork_new_process_for_each_test
=
True
,
fork_new_process_for_each_test
=
True
,
))
))
def
test_video_models_heavy
(
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
def
test_video_models_heavy
(
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
video_assets
:
_VideoAssets
):
video_assets
:
_VideoAssets
):
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
runners
.
run_video_test
(
runners
.
run_video_test
(
...
@@ -767,8 +766,8 @@ def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
...
@@ -767,8 +766,8 @@ def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs,
def
test_custom_inputs_models_heavy
(
def
test_custom_inputs_models_heavy
(
model_type
:
str
,
model_type
:
str
,
test_case
:
ExpandableVLMTestArgs
,
test_case
:
ExpandableVLMTestArgs
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
):
):
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
model_test_info
=
VLM_TEST_SETTINGS
[
model_type
]
runners
.
run_custom_inputs_test
(
runners
.
run_custom_inputs_test
(
...
...
tests/models/decoder_only/vision_language/test_phi3v.py
View file @
cf069aa8
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
import
os
import
os
import
re
import
re
from
typing
import
List
,
Optional
,
Tuple
,
Type
from
typing
import
Optional
import
pytest
import
pytest
from
transformers
import
AutoTokenizer
from
transformers
import
AutoTokenizer
...
@@ -25,7 +25,7 @@ HF_MULTIIMAGE_IMAGE_PROMPT = "<|user|>\n<|image_1|>\n<|image_2|>\nDescribe these
...
@@ -25,7 +25,7 @@ HF_MULTIIMAGE_IMAGE_PROMPT = "<|user|>\n<|image_1|>\n<|image_2|>\nDescribe these
models
=
[
"microsoft/Phi-3.5-vision-instruct"
]
models
=
[
"microsoft/Phi-3.5-vision-instruct"
]
def
vllm_to_hf_output
(
vllm_output
:
T
uple
[
L
ist
[
int
],
str
,
def
vllm_to_hf_output
(
vllm_output
:
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]],
Optional
[
SampleLogprobs
]],
model
:
str
):
model
:
str
):
"""Sanitize vllm output to be comparable with hf output."""
"""Sanitize vllm output to be comparable with hf output."""
...
@@ -55,9 +55,9 @@ if current_platform.is_rocm():
...
@@ -55,9 +55,9 @@ if current_platform.is_rocm():
def
run_test
(
def
run_test
(
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
inputs
:
L
ist
[
T
uple
[
L
ist
[
str
],
PromptImageInput
]],
inputs
:
l
ist
[
t
uple
[
l
ist
[
str
],
PromptImageInput
]],
model
:
str
,
model
:
str
,
*
,
*
,
dtype
:
str
,
dtype
:
str
,
...
...
tests/models/decoder_only/vision_language/test_pixtral.py
View file @
cf069aa8
...
@@ -6,7 +6,7 @@ Run `pytest tests/models/test_mistral.py`.
...
@@ -6,7 +6,7 @@ Run `pytest tests/models/test_mistral.py`.
import
json
import
json
import
uuid
import
uuid
from
dataclasses
import
asdict
from
dataclasses
import
asdict
from
typing
import
TYPE_CHECKING
,
Any
,
Dict
,
List
,
Optional
,
Tuple
from
typing
import
TYPE_CHECKING
,
Any
,
Optional
import
pytest
import
pytest
from
mistral_common.multimodal
import
download_image
from
mistral_common.multimodal
import
download_image
...
@@ -38,7 +38,7 @@ IMG_URLS = [
...
@@ -38,7 +38,7 @@ IMG_URLS = [
PROMPT
=
"Describe each image in one short sentence."
PROMPT
=
"Describe each image in one short sentence."
def
_create_msg_format
(
urls
:
L
ist
[
str
])
->
L
ist
[
D
ict
[
str
,
Any
]]:
def
_create_msg_format
(
urls
:
l
ist
[
str
])
->
l
ist
[
d
ict
[
str
,
Any
]]:
return
[{
return
[{
"role"
:
"role"
:
"user"
,
"user"
,
...
@@ -54,7 +54,7 @@ def _create_msg_format(urls: List[str]) -> List[Dict[str, Any]]:
...
@@ -54,7 +54,7 @@ def _create_msg_format(urls: List[str]) -> List[Dict[str, Any]]:
}]
}]
def
_create_msg_format_hf
(
urls
:
L
ist
[
str
])
->
L
ist
[
D
ict
[
str
,
Any
]]:
def
_create_msg_format_hf
(
urls
:
l
ist
[
str
])
->
l
ist
[
d
ict
[
str
,
Any
]]:
return
[{
return
[{
"role"
:
"role"
:
"user"
,
"user"
,
...
@@ -68,7 +68,7 @@ def _create_msg_format_hf(urls: List[str]) -> List[Dict[str, Any]]:
...
@@ -68,7 +68,7 @@ def _create_msg_format_hf(urls: List[str]) -> List[Dict[str, Any]]:
}]
}]
def
_create_engine_inputs
(
urls
:
L
ist
[
str
])
->
TokensPrompt
:
def
_create_engine_inputs
(
urls
:
l
ist
[
str
])
->
TokensPrompt
:
msg
=
_create_msg_format
(
urls
)
msg
=
_create_msg_format
(
urls
)
tokenizer
=
MistralTokenizer
.
from_model
(
"pixtral"
)
tokenizer
=
MistralTokenizer
.
from_model
(
"pixtral"
)
...
@@ -89,7 +89,7 @@ def _create_engine_inputs(urls: List[str]) -> TokensPrompt:
...
@@ -89,7 +89,7 @@ def _create_engine_inputs(urls: List[str]) -> TokensPrompt:
return
engine_inputs
return
engine_inputs
def
_create_engine_inputs_hf
(
urls
:
L
ist
[
str
])
->
TextPrompt
:
def
_create_engine_inputs_hf
(
urls
:
l
ist
[
str
])
->
TextPrompt
:
msg
=
_create_msg_format_hf
(
urls
)
msg
=
_create_msg_format_hf
(
urls
)
tokenizer
=
AutoProcessor
.
from_pretrained
(
"mistral-community/pixtral-12b"
)
tokenizer
=
AutoProcessor
.
from_pretrained
(
"mistral-community/pixtral-12b"
)
...
@@ -128,7 +128,7 @@ assert FIXTURES_PATH.exists()
...
@@ -128,7 +128,7 @@ assert FIXTURES_PATH.exists()
FIXTURE_LOGPROBS_CHAT
=
FIXTURES_PATH
/
"pixtral_chat.json"
FIXTURE_LOGPROBS_CHAT
=
FIXTURES_PATH
/
"pixtral_chat.json"
FIXTURE_LOGPROBS_ENGINE
=
FIXTURES_PATH
/
"pixtral_chat_engine.json"
FIXTURE_LOGPROBS_ENGINE
=
FIXTURES_PATH
/
"pixtral_chat_engine.json"
OutputsLogprobs
=
L
ist
[
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]]
OutputsLogprobs
=
l
ist
[
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]]
# For the test author to store golden output in JSON
# For the test author to store golden output in JSON
...
...
tests/models/decoder_only/vision_language/test_qwen2_vl.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
from
typing
import
Any
,
List
,
Optional
,
Tuple
,
Type
,
TypedDict
,
Union
from
typing
import
Any
,
Optional
,
TypedDict
,
Union
import
numpy.typing
as
npt
import
numpy.typing
as
npt
import
pytest
import
pytest
...
@@ -69,21 +69,21 @@ class Qwen2VLPromptVideoEmbeddingInput(TypedDict):
...
@@ -69,21 +69,21 @@ class Qwen2VLPromptVideoEmbeddingInput(TypedDict):
def
batch_make_image_embeddings
(
def
batch_make_image_embeddings
(
image_batches
:
L
ist
[
Union
[
Image
.
Image
,
L
ist
[
Image
.
Image
]]],
processor
,
image_batches
:
l
ist
[
Union
[
Image
.
Image
,
l
ist
[
Image
.
Image
]]],
processor
,
llm
:
VllmRunner
)
->
L
ist
[
Qwen2VLPromptImageEmbeddingInput
]:
llm
:
VllmRunner
)
->
l
ist
[
Qwen2VLPromptImageEmbeddingInput
]:
"""batched image embeddings for Qwen2-VL
"""batched image embeddings for Qwen2-VL
This will infer all images' embeddings in a single batch,
This will infer all images' embeddings in a single batch,
and split the result according to input batches.
and split the result according to input batches.
image_batches:
image_batches:
- Single-image batches: `
L
ist[Image.Image]`
- Single-image batches: `
l
ist[Image.Image]`
- Multiple-image batches: `
L
ist[
L
ist[Image.Image]]]`
- Multiple-image batches: `
l
ist[
l
ist[Image.Image]]]`
returns: `
L
ist[Qwen2VLPromptImageEmbeddingInput]`
returns: `
l
ist[Qwen2VLPromptImageEmbeddingInput]`
"""
"""
image_batches_
:
L
ist
[
Any
]
=
image_batches
[:]
image_batches_
:
l
ist
[
Any
]
=
image_batches
[:]
# convert single-image batches to multiple-image batches
# convert single-image batches to multiple-image batches
for
idx
in
range
(
len
(
image_batches_
)):
for
idx
in
range
(
len
(
image_batches_
)):
...
@@ -93,7 +93,7 @@ def batch_make_image_embeddings(
...
@@ -93,7 +93,7 @@ def batch_make_image_embeddings(
assert
isinstance
(
image_batches_
[
idx
],
list
)
assert
isinstance
(
image_batches_
[
idx
],
list
)
# append all images into a list (as a batch)
# append all images into a list (as a batch)
images
:
L
ist
[
Image
.
Image
]
=
[]
images
:
l
ist
[
Image
.
Image
]
=
[]
for
image_batch
in
image_batches_
:
for
image_batch
in
image_batches_
:
images
+=
image_batch
images
+=
image_batch
...
@@ -121,7 +121,7 @@ def batch_make_image_embeddings(
...
@@ -121,7 +121,7 @@ def batch_make_image_embeddings(
image_embeds
=
torch
.
concat
(
llm
.
apply_model
(
get_image_embeds
))
image_embeds
=
torch
.
concat
(
llm
.
apply_model
(
get_image_embeds
))
# split into original batches
# split into original batches
result
:
L
ist
[
Qwen2VLPromptImageEmbeddingInput
]
=
[]
result
:
l
ist
[
Qwen2VLPromptImageEmbeddingInput
]
=
[]
image_counter
=
0
image_counter
=
0
embed_counter
=
0
embed_counter
=
0
for
image_batch
in
image_batches_
:
for
image_batch
in
image_batches_
:
...
@@ -153,7 +153,7 @@ def batch_make_image_embeddings(
...
@@ -153,7 +153,7 @@ def batch_make_image_embeddings(
def
batch_make_video_embeddings
(
def
batch_make_video_embeddings
(
video_batches
:
PromptVideoInput
,
processor
,
video_batches
:
PromptVideoInput
,
processor
,
llm
:
VllmRunner
)
->
L
ist
[
Qwen2VLPromptVideoEmbeddingInput
]:
llm
:
VllmRunner
)
->
l
ist
[
Qwen2VLPromptVideoEmbeddingInput
]:
"""batched video embeddings for Qwen2-VL
"""batched video embeddings for Qwen2-VL
A NDArray represents a single video's all frames.
A NDArray represents a single video's all frames.
...
@@ -162,21 +162,21 @@ def batch_make_video_embeddings(
...
@@ -162,21 +162,21 @@ def batch_make_video_embeddings(
and split the result according to input batches.
and split the result according to input batches.
video_batches:
video_batches:
- Single-video batches: `
L
ist[NDArray]`
- Single-video batches: `
l
ist[NDArray]`
- Multiple-video batches: `
L
ist[
L
ist[NDArray]]`
- Multiple-video batches: `
l
ist[
l
ist[NDArray]]`
"""
"""
video_batches_
:
L
ist
[
Any
]
=
video_batches
[:]
video_batches_
:
l
ist
[
Any
]
=
video_batches
[:]
for
idx
in
range
(
len
(
video_batches_
)):
for
idx
in
range
(
len
(
video_batches_
)):
if
not
isinstance
(
video_batches_
[
idx
],
list
):
if
not
isinstance
(
video_batches_
[
idx
],
list
):
single_video_batch
:
L
ist
[
npt
.
NDArray
]
=
[
video_batches_
[
idx
]]
single_video_batch
:
l
ist
[
npt
.
NDArray
]
=
[
video_batches_
[
idx
]]
video_batches_
[
idx
]
=
single_video_batch
video_batches_
[
idx
]
=
single_video_batch
assert
isinstance
(
video_batches_
[
idx
],
list
)
assert
isinstance
(
video_batches_
[
idx
],
list
)
# append all videos into a list (as a batch)
# append all videos into a list (as a batch)
videos
:
L
ist
[
npt
.
NDArray
]
=
[]
videos
:
l
ist
[
npt
.
NDArray
]
=
[]
for
video_batch
in
video_batches_
:
for
video_batch
in
video_batches_
:
videos
+=
video_batch
videos
+=
video_batch
...
@@ -204,7 +204,7 @@ def batch_make_video_embeddings(
...
@@ -204,7 +204,7 @@ def batch_make_video_embeddings(
video_embeds
=
torch
.
concat
(
llm
.
apply_model
(
get_image_embeds
))
video_embeds
=
torch
.
concat
(
llm
.
apply_model
(
get_image_embeds
))
# split into original batches
# split into original batches
result
:
L
ist
[
Qwen2VLPromptVideoEmbeddingInput
]
=
[]
result
:
l
ist
[
Qwen2VLPromptVideoEmbeddingInput
]
=
[]
video_counter
=
0
video_counter
=
0
embed_counter
=
0
embed_counter
=
0
for
video_batch
in
video_batches_
:
for
video_batch
in
video_batches_
:
...
@@ -235,8 +235,8 @@ def batch_make_video_embeddings(
...
@@ -235,8 +235,8 @@ def batch_make_video_embeddings(
def
run_embedding_input_test
(
def
run_embedding_input_test
(
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
inputs
:
L
ist
[
T
uple
[
L
ist
[
str
],
PromptImageInput
,
PromptVideoInput
]],
inputs
:
l
ist
[
t
uple
[
l
ist
[
str
],
PromptImageInput
,
PromptVideoInput
]],
model
:
str
,
model
:
str
,
*
,
*
,
dtype
:
str
,
dtype
:
str
,
...
@@ -323,8 +323,8 @@ def test_qwen2_vl_image_embeddings_input(vllm_runner, image_assets, model,
...
@@ -323,8 +323,8 @@ def test_qwen2_vl_image_embeddings_input(vllm_runner, image_assets, model,
num_logprobs
:
int
)
->
None
:
num_logprobs
:
int
)
->
None
:
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
inputs_per_case
:
L
ist
[
T
uple
[
inputs_per_case
:
l
ist
[
t
uple
[
L
ist
[
str
],
PromptImageInput
,
PromptVideoInput
]]
=
[(
l
ist
[
str
],
PromptImageInput
,
PromptVideoInput
]]
=
[(
[
prompt
for
_
in
size_factors
],
[
prompt
for
_
in
size_factors
],
[
rescale_image_size
(
image
,
factor
)
for
factor
in
size_factors
],
[
rescale_image_size
(
image
,
factor
)
for
factor
in
size_factors
],
[],
[],
...
@@ -365,7 +365,7 @@ def test_qwen2_vl_multiple_image_embeddings_input(vllm_runner, image_assets,
...
@@ -365,7 +365,7 @@ def test_qwen2_vl_multiple_image_embeddings_input(vllm_runner, image_assets,
num_logprobs
:
int
)
->
None
:
num_logprobs
:
int
)
->
None
:
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
images
=
[
asset
.
pil_image
for
asset
in
image_assets
]
inputs_per_case
:
L
ist
[
T
uple
[
L
ist
[
str
],
PromptImageInput
,
inputs_per_case
:
l
ist
[
t
uple
[
l
ist
[
str
],
PromptImageInput
,
PromptVideoInput
]]
=
[(
PromptVideoInput
]]
=
[(
[
MULTIIMAGE_PROMPT
for
_
in
size_factors
],
[
MULTIIMAGE_PROMPT
for
_
in
size_factors
],
[[
[[
...
@@ -413,8 +413,8 @@ def test_qwen2_vl_video_embeddings_input(vllm_runner, video_assets, model,
...
@@ -413,8 +413,8 @@ def test_qwen2_vl_video_embeddings_input(vllm_runner, video_assets, model,
for
asset
in
video_assets
for
asset
in
video_assets
]
]
inputs_per_case
:
L
ist
[
T
uple
[
inputs_per_case
:
l
ist
[
t
uple
[
L
ist
[
str
],
PromptImageInput
,
PromptVideoInput
]]
=
[(
l
ist
[
str
],
PromptImageInput
,
PromptVideoInput
]]
=
[(
[
prompt
for
_
in
size_factors
],
[
prompt
for
_
in
size_factors
],
[],
[],
[
rescale_video_size
(
video
,
factor
)
for
factor
in
size_factors
],
[
rescale_video_size
(
video
,
factor
)
for
factor
in
size_factors
],
...
...
tests/models/decoder_only/vision_language/vlm_utils/builders.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""Helpers for building inputs that can be leveraged for different test types.
"""Helpers for building inputs that can be leveraged for different test types.
"""
"""
from
collections.abc
import
Iterable
from
pathlib
import
PosixPath
from
pathlib
import
PosixPath
from
typing
import
Callable
,
Iterable
,
List
,
Optional
,
Tuple
,
Union
from
typing
import
Callable
,
Optional
,
Union
import
torch
import
torch
...
@@ -33,7 +34,7 @@ def replace_test_placeholder(prompt: str, img_idx_to_prompt: Callable[[int],
...
@@ -33,7 +34,7 @@ def replace_test_placeholder(prompt: str, img_idx_to_prompt: Callable[[int],
def
get_model_prompts
(
base_prompts
:
Iterable
[
str
],
def
get_model_prompts
(
base_prompts
:
Iterable
[
str
],
img_idx_to_prompt
:
Optional
[
Callable
[[
int
],
str
]],
img_idx_to_prompt
:
Optional
[
Callable
[[
int
],
str
]],
video_idx_to_prompt
:
Optional
[
Callable
[[
int
],
str
]],
video_idx_to_prompt
:
Optional
[
Callable
[[
int
],
str
]],
prompt_formatter
:
Callable
[[
str
],
str
])
->
L
ist
[
str
]:
prompt_formatter
:
Callable
[[
str
],
str
])
->
l
ist
[
str
]:
"""Given a model-agnostic base prompt and test configuration for a model(s)
"""Given a model-agnostic base prompt and test configuration for a model(s)
to be tested, update the media placeholders and apply the prompt formatting
to be tested, update the media placeholders and apply the prompt formatting
to get the test prompt string for this model.
to get the test prompt string for this model.
...
@@ -218,7 +219,7 @@ def build_video_inputs_from_test_info(
...
@@ -218,7 +219,7 @@ def build_video_inputs_from_test_info(
)
for
video
,
prompt
in
zip
(
sampled_vids
,
model_prompts
)]
)
for
video
,
prompt
in
zip
(
sampled_vids
,
model_prompts
)]
def
apply_image_size_scaling
(
image
,
size
:
Union
[
float
,
T
uple
[
int
,
int
]],
def
apply_image_size_scaling
(
image
,
size
:
Union
[
float
,
t
uple
[
int
,
int
]],
size_type
:
SizeType
):
size_type
:
SizeType
):
"""Applies a size scaler to one image; this can be a an image size factor,
"""Applies a size scaler to one image; this can be a an image size factor,
which scales the image while maintaining the aspect ratio"""
which scales the image while maintaining the aspect ratio"""
...
...
tests/models/decoder_only/vision_language/vlm_utils/case_filtering.py
View file @
cf069aa8
...
@@ -5,7 +5,7 @@ handling multimodal placeholder substitution, and so on.
...
@@ -5,7 +5,7 @@ handling multimodal placeholder substitution, and so on.
"""
"""
import
itertools
import
itertools
from
collections
import
OrderedDict
from
collections
import
OrderedDict
from
typing
import
Dict
,
Iterable
,
Tuple
from
collections.abc
import
Iterable
import
pytest
import
pytest
...
@@ -13,9 +13,9 @@ from .types import (EMBEDDING_SIZE_FACTORS, ExpandableVLMTestArgs,
...
@@ -13,9 +13,9 @@ from .types import (EMBEDDING_SIZE_FACTORS, ExpandableVLMTestArgs,
ImageSizeWrapper
,
SizeType
,
VLMTestInfo
,
VLMTestType
)
ImageSizeWrapper
,
SizeType
,
VLMTestInfo
,
VLMTestType
)
def
get_filtered_test_settings
(
test_settings
:
D
ict
[
str
,
VLMTestInfo
],
def
get_filtered_test_settings
(
test_settings
:
d
ict
[
str
,
VLMTestInfo
],
test_type
:
VLMTestType
,
test_type
:
VLMTestType
,
fork_per_test
:
bool
)
->
D
ict
[
str
,
VLMTestInfo
]:
fork_per_test
:
bool
)
->
d
ict
[
str
,
VLMTestInfo
]:
"""Given the dict of potential test settings to run, return a subdict
"""Given the dict of potential test settings to run, return a subdict
of tests who have the current test type enabled with the matching val for
of tests who have the current test type enabled with the matching val for
fork_per_test.
fork_per_test.
...
@@ -49,7 +49,7 @@ def get_filtered_test_settings(test_settings: Dict[str, VLMTestInfo],
...
@@ -49,7 +49,7 @@ def get_filtered_test_settings(test_settings: Dict[str, VLMTestInfo],
return
matching_tests
return
matching_tests
def
get_parametrized_options
(
test_settings
:
D
ict
[
str
,
VLMTestInfo
],
def
get_parametrized_options
(
test_settings
:
d
ict
[
str
,
VLMTestInfo
],
test_type
:
VLMTestType
,
test_type
:
VLMTestType
,
fork_new_process_for_each_test
:
bool
):
fork_new_process_for_each_test
:
bool
):
"""Converts all of our VLMTestInfo into an expanded list of parameters.
"""Converts all of our VLMTestInfo into an expanded list of parameters.
...
@@ -121,7 +121,7 @@ def get_parametrized_options(test_settings: Dict[str, VLMTestInfo],
...
@@ -121,7 +121,7 @@ def get_parametrized_options(test_settings: Dict[str, VLMTestInfo],
def
get_wrapped_test_sizes
(
def
get_wrapped_test_sizes
(
test_info
:
VLMTestInfo
,
test_info
:
VLMTestInfo
,
test_type
:
VLMTestType
)
->
T
uple
[
ImageSizeWrapper
,
...]:
test_type
:
VLMTestType
)
->
t
uple
[
ImageSizeWrapper
,
...]:
"""Given a test info which may have size factors or fixed sizes, wrap them
"""Given a test info which may have size factors or fixed sizes, wrap them
and combine them into an iterable, each of which will be used in parameter
and combine them into an iterable, each of which will be used in parameter
expansion.
expansion.
...
...
tests/models/decoder_only/vision_language/vlm_utils/core.py
View file @
cf069aa8
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""Core test implementation to be shared across modalities."""
"""Core test implementation to be shared across modalities."""
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Tuple
,
Type
,
Union
from
typing
import
Any
,
Callable
,
Optional
,
Union
import
torch
import
torch
from
PIL.Image
import
Image
from
PIL.Image
import
Image
...
@@ -17,9 +17,9 @@ from .types import RunnerOutput
...
@@ -17,9 +17,9 @@ from .types import RunnerOutput
def
run_test
(
def
run_test
(
*
,
*
,
hf_runner
:
T
ype
[
HfRunner
],
hf_runner
:
t
ype
[
HfRunner
],
vllm_runner
:
T
ype
[
VllmRunner
],
vllm_runner
:
t
ype
[
VllmRunner
],
inputs
:
L
ist
[
T
uple
[
L
ist
[
str
],
L
ist
[
Union
[
L
ist
[
Image
],
Image
]]]],
inputs
:
l
ist
[
t
uple
[
l
ist
[
str
],
l
ist
[
Union
[
l
ist
[
Image
],
Image
]]]],
model
:
str
,
model
:
str
,
dtype
:
str
,
dtype
:
str
,
max_tokens
:
int
,
max_tokens
:
int
,
...
@@ -29,15 +29,15 @@ def run_test(
...
@@ -29,15 +29,15 @@ def run_test(
max_num_seqs
:
int
,
max_num_seqs
:
int
,
hf_output_post_proc
:
Optional
[
Callable
[[
RunnerOutput
,
str
],
Any
]],
hf_output_post_proc
:
Optional
[
Callable
[[
RunnerOutput
,
str
],
Any
]],
vllm_output_post_proc
:
Optional
[
Callable
[[
RunnerOutput
,
str
],
Any
]],
vllm_output_post_proc
:
Optional
[
Callable
[[
RunnerOutput
,
str
],
Any
]],
auto_cls
:
T
ype
[
_BaseAutoModelClass
],
auto_cls
:
t
ype
[
_BaseAutoModelClass
],
use_tokenizer_eos
:
bool
,
use_tokenizer_eos
:
bool
,
postprocess_inputs
:
Callable
[[
BatchEncoding
],
BatchEncoding
],
postprocess_inputs
:
Callable
[[
BatchEncoding
],
BatchEncoding
],
comparator
:
Callable
[...,
None
],
comparator
:
Callable
[...,
None
],
get_stop_token_ids
:
Optional
[
Callable
[[
AnyTokenizer
],
list
[
int
]]],
get_stop_token_ids
:
Optional
[
Callable
[[
AnyTokenizer
],
list
[
int
]]],
stop_str
:
Optional
[
L
ist
[
str
]],
stop_str
:
Optional
[
l
ist
[
str
]],
limit_mm_per_prompt
:
D
ict
[
str
,
int
],
limit_mm_per_prompt
:
d
ict
[
str
,
int
],
vllm_runner_kwargs
:
Optional
[
D
ict
[
str
,
Any
]],
vllm_runner_kwargs
:
Optional
[
d
ict
[
str
,
Any
]],
hf_model_kwargs
:
Optional
[
D
ict
[
str
,
Any
]],
hf_model_kwargs
:
Optional
[
d
ict
[
str
,
Any
]],
patch_hf_runner
:
Optional
[
Callable
[[
HfRunner
],
HfRunner
]],
patch_hf_runner
:
Optional
[
Callable
[[
HfRunner
],
HfRunner
]],
task
:
TaskOption
=
"auto"
,
task
:
TaskOption
=
"auto"
,
runner_mm_key
:
str
=
"images"
,
runner_mm_key
:
str
=
"images"
,
...
@@ -61,7 +61,7 @@ def run_test(
...
@@ -61,7 +61,7 @@ def run_test(
# if we run HF first, the cuda initialization will be done and it
# if we run HF first, the cuda initialization will be done and it
# will hurt multiprocessing backend with fork method (the default method).
# will hurt multiprocessing backend with fork method (the default method).
vllm_runner_kwargs_
:
D
ict
[
str
,
Any
]
=
{}
vllm_runner_kwargs_
:
d
ict
[
str
,
Any
]
=
{}
if
model_info
.
tokenizer
:
if
model_info
.
tokenizer
:
vllm_runner_kwargs_
[
"tokenizer"
]
=
model_info
.
tokenizer
vllm_runner_kwargs_
[
"tokenizer"
]
=
model_info
.
tokenizer
if
model_info
.
tokenizer_mode
:
if
model_info
.
tokenizer_mode
:
...
@@ -84,7 +84,7 @@ def run_test(
...
@@ -84,7 +84,7 @@ def run_test(
**
vllm_runner_kwargs_
)
as
vllm_model
:
**
vllm_runner_kwargs_
)
as
vllm_model
:
tokenizer
=
vllm_model
.
model
.
get_tokenizer
()
tokenizer
=
vllm_model
.
model
.
get_tokenizer
()
vllm_kwargs
:
D
ict
[
str
,
Any
]
=
{}
vllm_kwargs
:
d
ict
[
str
,
Any
]
=
{}
if
get_stop_token_ids
is
not
None
:
if
get_stop_token_ids
is
not
None
:
vllm_kwargs
[
"stop_token_ids"
]
=
get_stop_token_ids
(
tokenizer
)
vllm_kwargs
[
"stop_token_ids"
]
=
get_stop_token_ids
(
tokenizer
)
if
stop_str
:
if
stop_str
:
...
...
tests/models/decoder_only/vision_language/vlm_utils/model_utils.py
View file @
cf069aa8
...
@@ -6,7 +6,7 @@ typically specific to a small subset of models.
...
@@ -6,7 +6,7 @@ typically specific to a small subset of models.
import
re
import
re
import
types
import
types
from
pathlib
import
PosixPath
from
pathlib
import
PosixPath
from
typing
import
Callable
,
List
,
Optional
,
Tuple
,
Union
from
typing
import
Callable
,
Optional
,
Union
import
torch
import
torch
from
PIL.Image
import
Image
from
PIL.Image
import
Image
...
@@ -49,7 +49,7 @@ def fuyu_vllm_to_hf_output(vllm_output: RunnerOutput,
...
@@ -49,7 +49,7 @@ def fuyu_vllm_to_hf_output(vllm_output: RunnerOutput,
def
qwen_vllm_to_hf_output
(
def
qwen_vllm_to_hf_output
(
vllm_output
:
RunnerOutput
,
vllm_output
:
RunnerOutput
,
model
:
str
)
->
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
model
:
str
)
->
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
"""Sanitize vllm output [qwen models] to be comparable with hf output."""
"""Sanitize vllm output [qwen models] to be comparable with hf output."""
output_ids
,
output_str
,
out_logprobs
=
vllm_output
output_ids
,
output_str
,
out_logprobs
=
vllm_output
...
@@ -60,7 +60,7 @@ def qwen_vllm_to_hf_output(
...
@@ -60,7 +60,7 @@ def qwen_vllm_to_hf_output(
def
qwen2_vllm_to_hf_output
(
def
qwen2_vllm_to_hf_output
(
vllm_output
:
RunnerOutput
,
vllm_output
:
RunnerOutput
,
model
:
str
)
->
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
model
:
str
)
->
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
"""Sanitize vllm output [qwen2 models] to be comparable with hf output."""
"""Sanitize vllm output [qwen2 models] to be comparable with hf output."""
output_ids
,
output_str
,
out_logprobs
=
vllm_output
output_ids
,
output_str
,
out_logprobs
=
vllm_output
...
@@ -78,7 +78,7 @@ def llava_image_vllm_to_hf_output(vllm_output: RunnerOutput,
...
@@ -78,7 +78,7 @@ def llava_image_vllm_to_hf_output(vllm_output: RunnerOutput,
def
llava_video_vllm_to_hf_output
(
def
llava_video_vllm_to_hf_output
(
vllm_output
:
RunnerOutput
,
vllm_output
:
RunnerOutput
,
model
:
str
)
->
T
uple
[
L
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
model
:
str
)
->
t
uple
[
l
ist
[
int
],
str
,
Optional
[
SampleLogprobs
]]:
config
=
AutoConfig
.
from_pretrained
(
model
)
config
=
AutoConfig
.
from_pretrained
(
model
)
mm_token_id
=
config
.
video_token_index
mm_token_id
=
config
.
video_token_index
return
_llava_vllm_to_hf_output
(
vllm_output
,
model
,
mm_token_id
)
return
_llava_vllm_to_hf_output
(
vllm_output
,
model
,
mm_token_id
)
...
@@ -247,7 +247,7 @@ def molmo_post_processor(hf_inputs: BatchEncoding, dtype: str):
...
@@ -247,7 +247,7 @@ def molmo_post_processor(hf_inputs: BatchEncoding, dtype: str):
####### Prompt path encoders for models that need models on disk
####### Prompt path encoders for models that need models on disk
def
qwen_prompt_path_encoder
(
def
qwen_prompt_path_encoder
(
tmp_path
:
PosixPath
,
prompt
:
str
,
assets
:
Union
[
L
ist
[
ImageAsset
],
tmp_path
:
PosixPath
,
prompt
:
str
,
assets
:
Union
[
l
ist
[
ImageAsset
],
_ImageAssets
])
->
str
:
_ImageAssets
])
->
str
:
"""Given a temporary dir path, export one or more image assets into the
"""Given a temporary dir path, export one or more image assets into the
tempdir & replace its contents with the local path to the string so that
tempdir & replace its contents with the local path to the string so that
...
@@ -257,7 +257,7 @@ def qwen_prompt_path_encoder(
...
@@ -257,7 +257,7 @@ def qwen_prompt_path_encoder(
Args:
Args:
tmp_path: Tempdir for test under consideration.
tmp_path: Tempdir for test under consideration.
prompt: Prompt with image placeholders.
prompt: Prompt with image placeholders.
assets:
L
ist of image assets whose len equals the num placeholders.
assets:
l
ist of image assets whose len equals the num placeholders.
"""
"""
# Ensure that the number of placeholders matches the number of assets;
# Ensure that the number of placeholders matches the number of assets;
# If this is not true, the test is probably written incorrectly.
# If this is not true, the test is probably written incorrectly.
...
@@ -350,7 +350,7 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -350,7 +350,7 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
image_size
=
self
.
vision_config
.
image_size
self
.
image_size
=
self
.
vision_config
.
image_size
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
L
ist
[
Image
]],
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
l
ist
[
Image
]],
**
kwargs
):
**
kwargs
):
# yapf: disable
# yapf: disable
from
vllm.model_executor.models.h2ovl
import
(
from
vllm.model_executor.models.h2ovl
import
(
...
@@ -410,7 +410,7 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
...
@@ -410,7 +410,7 @@ def internvl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
max_num
=
self
.
config
.
max_dynamic_patch
self
.
image_size
=
self
.
vision_config
.
image_size
self
.
image_size
=
self
.
vision_config
.
image_size
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
L
ist
[
Image
]],
def
__call__
(
self
,
text
:
str
,
images
:
Union
[
Image
,
l
ist
[
Image
]],
**
kwargs
):
**
kwargs
):
from
vllm.model_executor.models.internvl
import
(
from
vllm.model_executor.models.internvl
import
(
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
IMG_CONTEXT
,
IMG_END
,
IMG_START
,
...
...
Prev
1
…
3
4
5
6
7
8
9
10
11
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment