test_mapper.py 2.71 KB
Newer Older
1
2
import numpy as np
import pytest
3
from transformers import CLIPImageProcessor, LlavaNextImageProcessor
4

5
from vllm.config import ModelConfig
6
from vllm.multimodal import MULTIMODAL_REGISTRY
7
from vllm.multimodal.utils import rescale_image_size
8

9
10

@pytest.mark.parametrize("dtype", ["half", "float"])
11
12
@pytest.mark.parametrize("size_factor", [0.25, 0.5, 1.0])
def test_clip_image_processor(image_assets, dtype, size_factor):
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
    MODEL_NAME = "llava-hf/llava-1.5-7b-hf"

    hf_processor = CLIPImageProcessor.from_pretrained(MODEL_NAME)
    assert isinstance(hf_processor, CLIPImageProcessor)

    model_config = ModelConfig(
        model=MODEL_NAME,
        tokenizer=MODEL_NAME,
        tokenizer_mode="auto",
        trust_remote_code=False,
        seed=0,
        dtype=dtype,
        revision=None,
    )

28
    for asset in image_assets:
29
30
        image = rescale_image_size(asset.pil_image, size_factor)

31
        hf_result = hf_processor.preprocess(
32
            image,
33
            return_tensors="pt",
34
        )
35
36
        vllm_result = MULTIMODAL_REGISTRY.map_input(
            model_config,
37
            {"image": image},
38
39
40
        )

        assert hf_result.keys() == vllm_result.keys()
41
42
        for key, hf_tensor in hf_result.items():
            hf_arr: np.ndarray = hf_tensor.numpy()
43
44
45
46
47
48
            vllm_arr: np.ndarray = vllm_result[key].numpy()

            assert hf_arr.shape == vllm_arr.shape, f"Failed for key={key}"
            assert np.allclose(hf_arr, vllm_arr), f"Failed for key={key}"


49
@pytest.mark.parametrize("dtype", ["half", "float"])
50
51
52
@pytest.mark.parametrize("size_factor", [0.25, 0.5, 1.0])
def test_llava_next_image_processor(image_assets, dtype, size_factor):
    MODEL_NAME = "llava-hf/llava-v1.6-vicuna-7b-hf"
53
54
55
56
57
58
59
60
61
62
63
64
65
66

    hf_processor = LlavaNextImageProcessor.from_pretrained(MODEL_NAME)
    assert isinstance(hf_processor, LlavaNextImageProcessor)

    model_config = ModelConfig(
        model=MODEL_NAME,
        tokenizer=MODEL_NAME,
        tokenizer_mode="auto",
        trust_remote_code=False,
        seed=0,
        dtype=dtype,
        revision=None,
    )

67
    for asset in image_assets:
68
69
        image = rescale_image_size(asset.pil_image, size_factor)

70
        hf_result = hf_processor.preprocess(
71
            image,
72
            return_tensors="pt",
73
        )
74
75
        vllm_result = MULTIMODAL_REGISTRY.map_input(
            model_config,
76
            {"image": image},
77
78
79
80
81
82
83
84
85
        )

        assert hf_result.keys() == vllm_result.keys()
        for key, hf_tensor in hf_result.items():
            hf_arr: np.ndarray = hf_tensor.numpy()
            vllm_arr: np.ndarray = vllm_result[key].numpy()

            assert hf_arr.shape == vllm_arr.shape, f"Failed for key={key}"
            assert np.allclose(hf_arr, vllm_arr), f"Failed for key={key}"