[Bugfix] Fix Phi-3v crash when input images are of certain sizes (#7840)

80162c44 · zifeitong · GitHub · aab0fcdb · 80162c44 · 80162c44
Unverified Commit 80162c44 authored Aug 24, 2024 by zifeitong Committed by GitHub Aug 24, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 22 additions and 7 deletions

tests/models/test_phi3v.py tests/models/test_phi3v.py +22 -5

vllm/model_executor/models/phi3v.py vllm/model_executor/models/phi3v.py +0 -2

No files found.
--- a/tests/models/test_phi3v.py
+++ b/tests/models/test_phi3v.py
@@ -3,13 +3,14 @@ import re
 from typing import List, Optional, Tuple, Type
 import pytest
+from PIL import Image
 from transformers import AutoTokenizer
 from vllm.multimodal.utils import rescale_image_size
 from vllm.sequence import SampleLogprobs
 from vllm.utils import is_cpu, is_hip
-from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
+from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner
 from .utils import check_logprobs_close
 pytestmark = pytest.mark.vlm
@@ -58,7 +59,7 @@ if is_hip():
 def run_test(
    hf_runner: Type[HfRunner],
    vllm_runner: Type[VllmRunner],
-    image_assets: _ImageAssets,
+    images: List[Image.Image],
    model: str,
    *,
    size_factors: List[float],
@@ -77,8 +78,6 @@ def run_test(
    Note, the text input is also adjusted to abide by vllm contract.
    The text output is sanitized to be able to compare with hf.
    """
-    images = [asset.pil_image for asset in image_assets]
    inputs_per_image = [(
        [prompt for _ in size_factors],
        [
@@ -159,7 +158,7 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
    run_test(
        hf_runner,
        vllm_runner,
-        image_assets,
+        [asset.pil_image for asset in image_assets],
        model,
        size_factors=size_factors,
        dtype=dtype,
@@ -167,3 +166,21 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
        num_logprobs=num_logprobs,
        tensor_parallel_size=1,
    )
+@pytest.mark.parametrize("model", models)
+@pytest.mark.parametrize("dtype", [target_dtype])
+def test_regression_7840(hf_runner, vllm_runner, image_assets, model,
+                         dtype) -> None:
+    # Regression test for #7840.
+    run_test(
+        hf_runner,
+        vllm_runner,
+        [image_assets[0].pil_image.resize((465, 226))],
+        model,
+        size_factors=[1.0],
+        dtype=dtype,
+        max_tokens=128,
+        num_logprobs=10,
+        tensor_parallel_size=1,
+    )
--- a/vllm/model_executor/models/phi3v.py
+++ b/vllm/model_executor/models/phi3v.py
@@ -400,8 +400,6 @@ def input_processor_for_phi3v(ctx: InputContext, llm_inputs: LLMInputs):
    image_data = multi_modal_data["image"]
    if isinstance(image_data, Image.Image):
        w, h = image_data.size
-        w, h = _calc_hd_transform_size(width=w, height=h)
        image_feature_size = get_phi3v_image_feature_size(hf_config,
                                                          input_width=w,
                                                          input_height=h)