Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ad58e9b3
Commit
ad58e9b3
authored
Sep 18, 2024
by
zhuwenwen
Browse files
Merge tag 'v0.6.1.post2' into v0.6.1.post2-dev
parents
408f663a
9ba0817f
Changes
118
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
111 additions
and
46 deletions
+111
-46
tests/models/decoder_only/language/test_fp8.py
tests/models/decoder_only/language/test_fp8.py
+1
-1
tests/models/decoder_only/language/test_gguf.py
tests/models/decoder_only/language/test_gguf.py
+1
-1
tests/models/decoder_only/language/test_gptq_marlin.py
tests/models/decoder_only/language/test_gptq_marlin.py
+1
-1
tests/models/decoder_only/language/test_gptq_marlin_24.py
tests/models/decoder_only/language/test_gptq_marlin_24.py
+2
-1
tests/models/decoder_only/language/test_granite.py
tests/models/decoder_only/language/test_granite.py
+1
-1
tests/models/decoder_only/language/test_jamba.py
tests/models/decoder_only/language/test_jamba.py
+2
-1
tests/models/decoder_only/language/test_marlin.py
tests/models/decoder_only/language/test_marlin.py
+1
-1
tests/models/decoder_only/language/test_mistral.py
tests/models/decoder_only/language/test_mistral.py
+1
-1
tests/models/decoder_only/language/test_modelopt.py
tests/models/decoder_only/language/test_modelopt.py
+0
-0
tests/models/decoder_only/language/test_models.py
tests/models/decoder_only/language/test_models.py
+1
-1
tests/models/decoder_only/language/test_phimoe.py
tests/models/decoder_only/language/test_phimoe.py
+1
-1
tests/models/decoder_only/vision_language/__init__.py
tests/models/decoder_only/vision_language/__init__.py
+0
-0
tests/models/decoder_only/vision_language/test_blip2.py
tests/models/decoder_only/vision_language/test_blip2.py
+3
-5
tests/models/decoder_only/vision_language/test_broadcast.py
tests/models/decoder_only/vision_language/test_broadcast.py
+42
-0
tests/models/decoder_only/vision_language/test_chameleon.py
tests/models/decoder_only/vision_language/test_chameleon.py
+3
-5
tests/models/decoder_only/vision_language/test_fuyu.py
tests/models/decoder_only/vision_language/test_fuyu.py
+3
-5
tests/models/decoder_only/vision_language/test_intern_vit.py
tests/models/decoder_only/vision_language/test_intern_vit.py
+1
-3
tests/models/decoder_only/vision_language/test_internvl.py
tests/models/decoder_only/vision_language/test_internvl.py
+39
-6
tests/models/decoder_only/vision_language/test_llava.py
tests/models/decoder_only/vision_language/test_llava.py
+5
-7
tests/models/decoder_only/vision_language/test_llava_image_embeds.py
...s/decoder_only/vision_language/test_llava_image_embeds.py
+3
-5
No files found.
tests/models/test_fp8.py
→
tests/models/
decoder_only/language/
test_fp8.py
View file @
ad58e9b3
...
@@ -10,7 +10,7 @@ import pytest
...
@@ -10,7 +10,7 @@ import pytest
from
tests.kernels.utils
import
override_backend_env_variable
from
tests.kernels.utils
import
override_backend_env_variable
from
tests.quantization.utils
import
is_quant_method_supported
from
tests.quantization.utils
import
is_quant_method_supported
from
..
models
.utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"true"
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"true"
...
...
tests/models/test_gguf.py
→
tests/models/
decoder_only/language/
test_gguf.py
View file @
ad58e9b3
...
@@ -11,7 +11,7 @@ from transformers import AutoTokenizer
...
@@ -11,7 +11,7 @@ from transformers import AutoTokenizer
from
tests.quantization.utils
import
is_quant_method_supported
from
tests.quantization.utils
import
is_quant_method_supported
from
.utils
import
check_logprobs_close
from
..
.utils
import
check_logprobs_close
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"true"
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"true"
...
...
tests/models/test_gptq_marlin.py
→
tests/models/
decoder_only/language/
test_gptq_marlin.py
View file @
ad58e9b3
...
@@ -15,7 +15,7 @@ import pytest
...
@@ -15,7 +15,7 @@ import pytest
from
tests.quantization.utils
import
is_quant_method_supported
from
tests.quantization.utils
import
is_quant_method_supported
from
vllm.model_executor.layers.rotary_embedding
import
_ROPE_DICT
from
vllm.model_executor.layers.rotary_embedding
import
_ROPE_DICT
from
.utils
import
check_logprobs_close
from
..
.utils
import
check_logprobs_close
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"true"
os
.
environ
[
"TOKENIZERS_PARALLELISM"
]
=
"true"
...
...
tests/models/test_gptq_marlin_24.py
→
tests/models/
decoder_only/language/
test_gptq_marlin_24.py
View file @
ad58e9b3
...
@@ -10,9 +10,10 @@ from dataclasses import dataclass
...
@@ -10,9 +10,10 @@ from dataclasses import dataclass
import
pytest
import
pytest
from
tests.models.utils
import
check_logprobs_close
from
tests.quantization.utils
import
is_quant_method_supported
from
tests.quantization.utils
import
is_quant_method_supported
from
...utils
import
check_logprobs_close
@
dataclass
@
dataclass
class
ModelPair
:
class
ModelPair
:
...
...
tests/models/test_granite.py
→
tests/models/
decoder_only/language/
test_granite.py
View file @
ad58e9b3
...
@@ -6,7 +6,7 @@ import importlib.metadata
...
@@ -6,7 +6,7 @@ import importlib.metadata
import
pytest
import
pytest
from
.utils
import
check_logprobs_close
from
..
.utils
import
check_logprobs_close
TRANSFORMERS_VERSION
=
tuple
(
TRANSFORMERS_VERSION
=
tuple
(
map
(
int
,
map
(
int
,
...
...
tests/models/test_jamba.py
→
tests/models/
decoder_only/language/
test_jamba.py
View file @
ad58e9b3
import
pytest
import
pytest
from
tests.models.utils
import
check_outputs_equal
from
vllm.worker.model_runner
import
_get_graph_batch_size
from
vllm.worker.model_runner
import
_get_graph_batch_size
from
...utils
import
check_outputs_equal
MODELS
=
[
"ai21labs/Jamba-tiny-random"
]
MODELS
=
[
"ai21labs/Jamba-tiny-random"
]
...
...
tests/models/test_marlin.py
→
tests/models/
decoder_only/language/
test_marlin.py
View file @
ad58e9b3
...
@@ -16,7 +16,7 @@ import pytest
...
@@ -16,7 +16,7 @@ import pytest
from
tests.quantization.utils
import
is_quant_method_supported
from
tests.quantization.utils
import
is_quant_method_supported
from
.utils
import
check_logprobs_close
from
..
.utils
import
check_logprobs_close
@
dataclass
@
dataclass
...
...
tests/models/test_mistral.py
→
tests/models/
decoder_only/language/
test_mistral.py
View file @
ad58e9b3
...
@@ -4,7 +4,7 @@ Run `pytest tests/models/test_mistral.py`.
...
@@ -4,7 +4,7 @@ Run `pytest tests/models/test_mistral.py`.
"""
"""
import
pytest
import
pytest
from
.utils
import
check_logprobs_close
from
..
.utils
import
check_logprobs_close
MODELS
=
[
MODELS
=
[
"mistralai/Mistral-7B-Instruct-v0.1"
,
"mistralai/Mistral-7B-Instruct-v0.1"
,
...
...
tests/models/test_modelopt.py
→
tests/models/
decoder_only/language/
test_modelopt.py
View file @
ad58e9b3
File moved
tests/models/test_models.py
→
tests/models/
decoder_only/language/
test_models.py
View file @
ad58e9b3
...
@@ -7,7 +7,7 @@ Run `pytest tests/models/test_models.py`.
...
@@ -7,7 +7,7 @@ Run `pytest tests/models/test_models.py`.
"""
"""
import
pytest
import
pytest
from
.utils
import
check_outputs_equal
from
..
.utils
import
check_outputs_equal
MODELS
=
[
MODELS
=
[
"facebook/opt-125m"
,
"facebook/opt-125m"
,
...
...
tests/models/test_phimoe.py
→
tests/models/
decoder_only/language/
test_phimoe.py
View file @
ad58e9b3
...
@@ -7,7 +7,7 @@ import torch
...
@@ -7,7 +7,7 @@ import torch
from
vllm.utils
import
is_cpu
from
vllm.utils
import
is_cpu
from
.utils
import
check_logprobs_close
from
..
.utils
import
check_logprobs_close
MODELS
=
[
MODELS
=
[
"microsoft/Phi-3.5-MoE-instruct"
,
"microsoft/Phi-3.5-MoE-instruct"
,
...
...
tests/models/decoder_only/vision_language/__init__.py
0 → 100644
View file @
ad58e9b3
tests/models/test_blip2.py
→
tests/models/
decoder_only/vision_language/
test_blip2.py
View file @
ad58e9b3
...
@@ -6,10 +6,8 @@ from transformers import AutoModelForVision2Seq, AutoTokenizer
...
@@ -6,10 +6,8 @@ from transformers import AutoModelForVision2Seq, AutoTokenizer
from
vllm.multimodal.utils
import
rescale_image_size
from
vllm.multimodal.utils
import
rescale_image_size
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
..conftest
import
IMAGE_ASSETS
from
....conftest
import
IMAGE_ASSETS
from
.utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
pytestmark
=
pytest
.
mark
.
vlm
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
"stop_sign"
:
...
@@ -56,7 +54,7 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
...
@@ -56,7 +54,7 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
"""Inference result should be the same between hf and vllm.
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test
is under tests/images
.
All the image fixtures for the test
are from IMAGE_ASSETS
.
For huggingface runner, we provide the PIL images as input.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalData objects and corresponding
For vllm runner, we provide MultiModalData objects and corresponding
MultiModalConfig as input.
MultiModalConfig as input.
...
...
tests/
distributed/test_multimodal
_broadcast.py
→
tests/
models/decoder_only/vision_language/test
_broadcast.py
View file @
ad58e9b3
"""Compare the outputs of HF and distributed vLLM when using greedy sampling.
Run:
```sh
pytest -s -v test_multimodal_broadcast.py
```
"""
import
pytest
import
pytest
from
vllm.utils
import
cuda_device_count_stateless
from
....utils
import
multi_gpu_test
from
..utils
import
fork_new_process_for_each_test
@
pytest
.
mark
.
skipif
(
cuda_device_count_stateless
()
<
2
,
@
multi_gpu_test
(
num_gpus
=
2
)
reason
=
"Need at least 2 GPUs to run the test."
)
@
pytest
.
mark
.
parametrize
(
"distributed_executor_backend"
,
[
"ray"
,
"mp"
])
@
pytest
.
mark
.
parametrize
(
"model, distributed_executor_backend"
,
[
@
pytest
.
mark
.
parametrize
(
"model"
,
[
(
"llava-hf/llava-1.5-7b-hf"
,
"ray"
),
"llava-hf/llava-1.5-7b-hf"
,
(
"llava-hf/llava-v1.6-mistral-7b-hf"
,
"ray"
),
"llava-hf/llava-v1.6-mistral-7b-hf"
,
(
"facebook/chameleon-7b"
,
"ray"
),
"facebook/chameleon-7b"
,
(
"llava-hf/llava-1.5-7b-hf"
,
"mp"
),
(
"llava-hf/llava-v1.6-mistral-7b-hf"
,
"mp"
),
(
"facebook/chameleon-7b"
,
"mp"
),
])
])
@
fork_new_process_for_each_test
def
test_models
(
hf_runner
,
vllm_runner
,
image_assets
,
def
test_models
(
hf_runner
,
vllm_runner
,
image_assets
,
model
:
str
,
distributed_executor_backend
,
model
)
->
None
:
distributed_executor_backend
:
str
)
->
None
:
dtype
=
"half"
dtype
=
"half"
max_tokens
=
5
max_tokens
=
5
...
@@ -33,13 +19,11 @@ def test_models(hf_runner, vllm_runner, image_assets, model: str,
...
@@ -33,13 +19,11 @@ def test_models(hf_runner, vllm_runner, image_assets, model: str,
tensor_parallel_size
=
2
tensor_parallel_size
=
2
if
model
.
startswith
(
"llava-hf/llava-1.5"
):
if
model
.
startswith
(
"llava-hf/llava-1.5"
):
from
..models
.test_llava
import
models
,
run_test
from
.test_llava
import
models
,
run_test
elif
model
.
startswith
(
"llava-hf/llava-v1.6"
):
elif
model
.
startswith
(
"llava-hf/llava-v1.6"
):
from
..models.test_llava_next
import
run_test
# type: ignore[no-redef]
from
.test_llava_next
import
models
,
run_test
# type: ignore[no-redef]
from
..models.test_llava_next
import
models
elif
model
.
startswith
(
"facebook/chameleon"
):
elif
model
.
startswith
(
"facebook/chameleon"
):
from
..models.test_chameleon
import
run_test
# type: ignore[no-redef]
from
.test_chameleon
import
models
,
run_test
# type: ignore[no-redef]
from
..models.test_chameleon
import
models
else
:
else
:
raise
NotImplementedError
(
f
"Unsupported model:
{
model
}
"
)
raise
NotImplementedError
(
f
"Unsupported model:
{
model
}
"
)
...
...
tests/models/test_chameleon.py
→
tests/models/
decoder_only/vision_language/
test_chameleon.py
View file @
ad58e9b3
...
@@ -6,10 +6,8 @@ from transformers import AutoModelForVision2Seq, BatchEncoding
...
@@ -6,10 +6,8 @@ from transformers import AutoModelForVision2Seq, BatchEncoding
from
vllm.multimodal.utils
import
rescale_image_size
from
vllm.multimodal.utils
import
rescale_image_size
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
from
..conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
.utils
import
check_outputs_equal
from
...utils
import
check_outputs_equal
pytestmark
=
pytest
.
mark
.
vlm
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
"stop_sign"
:
...
@@ -36,7 +34,7 @@ def run_test(
...
@@ -36,7 +34,7 @@ def run_test(
):
):
"""Inference result should be the same between hf and vllm.
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test
is under tests/images
.
All the image fixtures for the test
are from IMAGE_ASSETS
.
For huggingface runner, we provide the PIL images as input.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
For vllm runner, we provide MultiModalDataDict objects
and corresponding vision language config as input.
and corresponding vision language config as input.
...
...
tests/models/test_fuyu.py
→
tests/models/
decoder_only/vision_language/
test_fuyu.py
View file @
ad58e9b3
...
@@ -6,10 +6,8 @@ from vllm.multimodal.utils import rescale_image_size
...
@@ -6,10 +6,8 @@ from vllm.multimodal.utils import rescale_image_size
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
vllm.utils
import
is_cpu
from
vllm.utils
import
is_cpu
from
..conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
.utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
pytestmark
=
pytest
.
mark
.
vlm
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
"stop_sign"
:
...
@@ -46,7 +44,7 @@ def run_test(
...
@@ -46,7 +44,7 @@ def run_test(
):
):
"""Inference result should be the same between hf and vllm.
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test
is under tests/images
.
All the image fixtures for the test
are from IMAGE_ASSETS
.
For huggingface runner, we provide the PIL images as input.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
and corresponding MultiModalConfig as input.
...
...
tests/models/test_intern_vit.py
→
tests/models/
decoder_only/vision_language/
test_intern_vit.py
View file @
ad58e9b3
...
@@ -6,9 +6,7 @@ import torch.nn as nn
...
@@ -6,9 +6,7 @@ import torch.nn as nn
from
huggingface_hub
import
snapshot_download
from
huggingface_hub
import
snapshot_download
from
transformers
import
AutoConfig
,
AutoModel
,
CLIPImageProcessor
from
transformers
import
AutoConfig
,
AutoModel
,
CLIPImageProcessor
from
..conftest
import
_ImageAssets
,
cleanup
from
....conftest
import
_ImageAssets
,
cleanup
pytestmark
=
pytest
.
mark
.
vlm
# we use snapshot_download to prevent conflicts between
# we use snapshot_download to prevent conflicts between
# dynamic_module and trust_remote_code for hf_runner
# dynamic_module and trust_remote_code for hf_runner
...
...
tests/models/test_internvl.py
→
tests/models/
decoder_only/vision_language/
test_internvl.py
View file @
ad58e9b3
...
@@ -9,11 +9,9 @@ from transformers import AutoConfig
...
@@ -9,11 +9,9 @@ from transformers import AutoConfig
from
vllm.multimodal.utils
import
rescale_image_size
from
vllm.multimodal.utils
import
rescale_image_size
from
vllm.utils
import
is_cpu
from
vllm.utils
import
is_cpu
from
..conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
,
from
....conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
,
_ImageAssets
)
_ImageAssets
)
from
.utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
pytestmark
=
pytest
.
mark
.
vlm
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
"stop_sign"
:
...
@@ -78,7 +76,7 @@ def run_test(
...
@@ -78,7 +76,7 @@ def run_test(
):
):
"""Inference result should be the same between hf and vllm.
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test
is under tests/images
.
All the image fixtures for the test
are from IMAGE_ASSETS
.
For huggingface runner, we provide the PIL images as input.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
and corresponding MultiModalConfig as input.
...
@@ -331,6 +329,41 @@ def test_multi_images_models(hf_runner, vllm_runner, image_assets, model,
...
@@ -331,6 +329,41 @@ def test_multi_images_models(hf_runner, vllm_runner, image_assets, model,
)
)
@
pytest
.
mark
.
parametrize
(
"model"
,
[
"OpenGVLab/InternVL2-2B"
])
@
pytest
.
mark
.
parametrize
(
"size_factors"
,
[[
0.5
,
1.0
]])
@
pytest
.
mark
.
parametrize
(
"dtype"
,
[
target_dtype
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
torch
.
inference_mode
()
def
test_different_num_patches
(
hf_runner
,
vllm_runner
,
image_assets
,
model
,
size_factors
,
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
images
=
[
asset
.
pil_image
.
resize
((
896
,
896
))
for
asset
in
image_assets
]
inputs_batching
=
[(
[
prompt
for
_
in
size_factors
],
[
rescale_image_size
(
image
,
factor
)
for
factor
in
size_factors
],
)
for
image
,
prompt
in
zip
(
images
,
HF_IMAGE_PROMPTS
)]
inputs_multi_images
=
[
([
HF_MULTIIMAGE_IMAGE_PROMPT
for
_
in
size_factors
],
[[
rescale_image_size
(
image
,
factor
)
for
image
in
images
]
for
factor
in
size_factors
])
]
for
inputs
in
[
inputs_batching
,
inputs_multi_images
]:
run_test
(
hf_runner
,
vllm_runner
,
inputs
,
model
,
dtype
=
dtype
,
max_tokens
=
max_tokens
,
num_logprobs
=
num_logprobs
,
mm_limit
=
2
,
tensor_parallel_size
=
1
,
)
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"models"
,
[(
"OpenGVLab/InternVL2-2B"
,
"OpenGVLab/InternVL2-2B-AWQ"
)])
"models"
,
[(
"OpenGVLab/InternVL2-2B"
,
"OpenGVLab/InternVL2-2B-AWQ"
)])
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
...
...
tests/models/test_llava.py
→
tests/models/
decoder_only/vision_language/
test_llava.py
View file @
ad58e9b3
...
@@ -8,11 +8,9 @@ from vllm.multimodal.utils import rescale_image_size
...
@@ -8,11 +8,9 @@ from vllm.multimodal.utils import rescale_image_size
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
from
vllm.utils
import
STR_DTYPE_TO_TORCH_DTYPE
from
..conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
,
from
....conftest
import
(
IMAGE_ASSETS
,
HfRunner
,
PromptImageInput
,
VllmRunner
,
_ImageAssets
)
_ImageAssets
)
from
.utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
pytestmark
=
pytest
.
mark
.
vlm
_LIMIT_IMAGE_PER_PROMPT
=
4
_LIMIT_IMAGE_PER_PROMPT
=
4
...
@@ -143,7 +141,7 @@ def _run_test(
...
@@ -143,7 +141,7 @@ def _run_test(
):
):
"""Inference result should be the same between hf and vllm.
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test
is under tests/images
.
All the image fixtures for the test
are from IMAGE_ASSETS
.
For huggingface runner, we provide the PIL images as input.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
and corresponding MultiModalConfig as input.
...
@@ -239,7 +237,7 @@ def _run_test(
...
@@ -239,7 +237,7 @@ def _run_test(
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
@
pytest
.
mark
.
parametrize
(
"max_tokens"
,
[
128
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
@
pytest
.
mark
.
parametrize
(
"num_logprobs"
,
[
5
])
def
test_models
(
hf_runner
,
vllm_runner
,
image_assets
,
model
,
size_factors
,
def
test_models
(
hf_runner
,
vllm_runner
,
image_assets
,
model
,
size_factors
,
dtype
:
str
,
max_tokens
:
int
,
num_logprobs
:
int
)
->
None
:
dtype
,
max_tokens
,
num_logprobs
)
->
None
:
run_test
(
run_test
(
hf_runner
,
hf_runner
,
vllm_runner
,
vllm_runner
,
...
...
tests/models/test_llava_image_embeds.py
→
tests/models/
decoder_only/vision_language/
test_llava_image_embeds.py
View file @
ad58e9b3
...
@@ -5,10 +5,8 @@ from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
...
@@ -5,10 +5,8 @@ from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
from
vllm.sequence
import
SampleLogprobs
from
vllm.sequence
import
SampleLogprobs
from
..conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
....conftest
import
IMAGE_ASSETS
,
HfRunner
,
VllmRunner
,
_ImageAssets
from
.utils
import
check_logprobs_close
from
...utils
import
check_logprobs_close
pytestmark
=
pytest
.
mark
.
vlm
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
HF_IMAGE_PROMPTS
=
IMAGE_ASSETS
.
prompts
({
"stop_sign"
:
"stop_sign"
:
...
@@ -62,7 +60,7 @@ def run_test(
...
@@ -62,7 +60,7 @@ def run_test(
):
):
"""Inference result should be the same between hf and vllm.
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test
is under tests/images
.
All the image fixtures for the test
are from IMAGE_ASSETS
.
For huggingface runner, we provide the PIL images as input.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
For vllm runner, we provide MultiModalDataDict objects
and corresponding vision language config as input.
and corresponding vision language config as input.
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment