Unverified Commit 80162c44 authored by zifeitong's avatar zifeitong Committed by GitHub
Browse files

[Bugfix] Fix Phi-3v crash when input images are of certain sizes (#7840)

parent aab0fcdb
...@@ -3,13 +3,14 @@ import re ...@@ -3,13 +3,14 @@ import re
from typing import List, Optional, Tuple, Type from typing import List, Optional, Tuple, Type
import pytest import pytest
from PIL import Image
from transformers import AutoTokenizer from transformers import AutoTokenizer
from vllm.multimodal.utils import rescale_image_size from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs from vllm.sequence import SampleLogprobs
from vllm.utils import is_cpu, is_hip from vllm.utils import is_cpu, is_hip
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner
from .utils import check_logprobs_close from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm pytestmark = pytest.mark.vlm
...@@ -58,7 +59,7 @@ if is_hip(): ...@@ -58,7 +59,7 @@ if is_hip():
def run_test( def run_test(
hf_runner: Type[HfRunner], hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: Type[VllmRunner],
image_assets: _ImageAssets, images: List[Image.Image],
model: str, model: str,
*, *,
size_factors: List[float], size_factors: List[float],
...@@ -77,8 +78,6 @@ def run_test( ...@@ -77,8 +78,6 @@ def run_test(
Note, the text input is also adjusted to abide by vllm contract. Note, the text input is also adjusted to abide by vllm contract.
The text output is sanitized to be able to compare with hf. The text output is sanitized to be able to compare with hf.
""" """
images = [asset.pil_image for asset in image_assets]
inputs_per_image = [( inputs_per_image = [(
[prompt for _ in size_factors], [prompt for _ in size_factors],
[ [
...@@ -159,7 +158,7 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors, ...@@ -159,7 +158,7 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
run_test( run_test(
hf_runner, hf_runner,
vllm_runner, vllm_runner,
image_assets, [asset.pil_image for asset in image_assets],
model, model,
size_factors=size_factors, size_factors=size_factors,
dtype=dtype, dtype=dtype,
...@@ -167,3 +166,21 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors, ...@@ -167,3 +166,21 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
num_logprobs=num_logprobs, num_logprobs=num_logprobs,
tensor_parallel_size=1, tensor_parallel_size=1,
) )
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize("dtype", [target_dtype])
def test_regression_7840(hf_runner, vllm_runner, image_assets, model,
dtype) -> None:
# Regression test for #7840.
run_test(
hf_runner,
vllm_runner,
[image_assets[0].pil_image.resize((465, 226))],
model,
size_factors=[1.0],
dtype=dtype,
max_tokens=128,
num_logprobs=10,
tensor_parallel_size=1,
)
...@@ -400,8 +400,6 @@ def input_processor_for_phi3v(ctx: InputContext, llm_inputs: LLMInputs): ...@@ -400,8 +400,6 @@ def input_processor_for_phi3v(ctx: InputContext, llm_inputs: LLMInputs):
image_data = multi_modal_data["image"] image_data = multi_modal_data["image"]
if isinstance(image_data, Image.Image): if isinstance(image_data, Image.Image):
w, h = image_data.size w, h = image_data.size
w, h = _calc_hd_transform_size(width=w, height=h)
image_feature_size = get_phi3v_image_feature_size(hf_config, image_feature_size = get_phi3v_image_feature_size(hf_config,
input_width=w, input_width=w,
input_height=h) input_height=h)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment