test_vision_openai_server.py 6.69 KB
Newer Older
1
2
import base64
import io
Ying Sheng's avatar
Ying Sheng committed
3
import json
4
5
6
import os
import sys
import time
Ying Sheng's avatar
Ying Sheng committed
7
8
import unittest

9
import numpy as np
Ying Sheng's avatar
Ying Sheng committed
10
import openai
11
12
13
import requests
from decord import VideoReader, cpu
from PIL import Image
Ying Sheng's avatar
Ying Sheng committed
14
15
16

from sglang.srt.hf_transformers_utils import get_tokenizer
from sglang.srt.utils import kill_child_process
Yineng Zhang's avatar
Yineng Zhang committed
17
from sglang.test.test_utils import DEFAULT_URL_FOR_UNIT_TEST, popen_launch_server
Ying Sheng's avatar
Ying Sheng committed
18
19


20
# python3 -m sglang.launch_server --model-path lmms-lab/llava-onevision-qwen2-72b-ov --tokenizer-path lmms-lab/llavanext-qwen-siglip-tokenizer --port=30000 --host=127.0.0.1 --tp-size=8 --chat-template=chatml-llava --chunked-prefill-size=16384
Ying Sheng's avatar
Ying Sheng committed
21
22
23
class TestOpenAIVisionServer(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
24
        cls.model = "lmms-lab/llava-onevision-qwen2-0.5b-ov"
Yineng Zhang's avatar
Yineng Zhang committed
25
        cls.base_url = DEFAULT_URL_FOR_UNIT_TEST
Ying Sheng's avatar
Ying Sheng committed
26
27
28
29
30
31
32
33
        cls.api_key = "sk-123456"
        cls.process = popen_launch_server(
            cls.model,
            cls.base_url,
            timeout=300,
            api_key=cls.api_key,
            other_args=[
                "--chat-template",
34
                "chatml-llava",
Ying Sheng's avatar
Ying Sheng committed
35
                "--tokenizer-path",
36
37
38
                "lmms-lab/llavanext-qwen-siglip-tokenizer",
                "--chunked-prefill-size",
                "16384",
Ying Sheng's avatar
Ying Sheng committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
                "--log-requests",
            ],
        )
        cls.base_url += "/v1"

    @classmethod
    def tearDownClass(cls):
        kill_child_process(cls.process.pid)

    def test_chat_completion(self):
        client = openai.Client(api_key=self.api_key, base_url=self.base_url)

        response = client.chat.completions.create(
            model="default",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image_url",
                            "image_url": {
Ying Sheng's avatar
Ying Sheng committed
60
                                "url": "https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
Ying Sheng's avatar
Ying Sheng committed
61
62
                            },
                        },
Ying Sheng's avatar
Ying Sheng committed
63
64
65
66
                        {
                            "type": "text",
                            "text": "Describe this image in a very short sentence.",
                        },
Ying Sheng's avatar
Ying Sheng committed
67
68
69
70
71
72
73
                    ],
                },
            ],
            temperature=0,
        )

        assert response.choices[0].message.role == "assistant"
Ying Sheng's avatar
Ying Sheng committed
74
75
76
        text = response.choices[0].message.content
        assert isinstance(text, str)
        assert "car" in text or "taxi" in text, text
Ying Sheng's avatar
Ying Sheng committed
77
78
79
80
81
82
        assert response.id
        assert response.created
        assert response.usage.prompt_tokens > 0
        assert response.usage.completion_tokens > 0
        assert response.usage.total_tokens > 0

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
    def prepare_video_messages(self, video_path):
        max_frames_num = 32
        vr = VideoReader(video_path, ctx=cpu(0))
        total_frame_num = len(vr)
        uniform_sampled_frames = np.linspace(
            0, total_frame_num - 1, max_frames_num, dtype=int
        )
        frame_idx = uniform_sampled_frames.tolist()
        frames = vr.get_batch(frame_idx).asnumpy()

        base64_frames = []
        for frame in frames:
            pil_img = Image.fromarray(frame)
            buff = io.BytesIO()
            pil_img.save(buff, format="JPEG")
            base64_str = base64.b64encode(buff.getvalue()).decode("utf-8")
            base64_frames.append(base64_str)

        messages = [{"role": "user", "content": []}]
        frame_format = {
            "type": "image_url",
            "image_url": {"url": "data:image/jpeg;base64,{}"},
        }

        for base64_frame in base64_frames:
            frame_format["image_url"]["url"] = "data:image/jpeg;base64,{}".format(
                base64_frame
            )
            messages[0]["content"].append(frame_format.copy())

        prompt = {"type": "text", "text": "Please describe the video in detail."}
        messages[0]["content"].append(prompt)

        return messages

    def test_video_chat_completion(self):
        url = "https://raw.githubusercontent.com/EvolvingLMMs-Lab/sglang/dev/onevision_local/assets/jobs.mp4"
        cache_dir = os.path.expanduser("~/.cache")
        file_path = os.path.join(cache_dir, "jobs.mp4")
        os.makedirs(cache_dir, exist_ok=True)

        if not os.path.exists(file_path):
            response = requests.get(url)
            response.raise_for_status()

            with open(file_path, "wb") as f:
                f.write(response.content)

        client = openai.Client(api_key=self.api_key, base_url=self.base_url)

        messages = self.prepare_video_messages(file_path)

        start_time = time.time()
        video_request = client.chat.completions.create(
            model="default",
            messages=messages,
            temperature=0,
            max_tokens=1024,
            stream=True,
        )
        print("-" * 30)
        video_response = ""

        for chunk in video_request:
            if chunk.choices[0].delta.content is not None:
                content = chunk.choices[0].delta.content
                video_response += content
                sys.stdout.write(content)
                sys.stdout.flush()
        print("-" * 30)

        # Add assertions to validate the video response
        self.assertIsNotNone(video_response)
        self.assertGreater(len(video_response), 0)

Ying Sheng's avatar
Ying Sheng committed
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
    def test_regex(self):
        client = openai.Client(api_key=self.api_key, base_url=self.base_url)

        regex = (
            r"""\{\n"""
            + r"""   "color": "[\w]+",\n"""
            + r"""   "number_of_cars": [\d]+\n"""
            + r"""\}"""
        )

        response = client.chat.completions.create(
            model="default",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": "https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
                            },
                        },
                        {
                            "type": "text",
                            "text": "Describe this image in the JSON format.",
                        },
                    ],
                },
            ],
            temperature=0,
            extra_body={"regex": regex},
        )
        text = response.choices[0].message.content

        try:
            js_obj = json.loads(text)
        except (TypeError, json.decoder.JSONDecodeError):
            print("JSONDecodeError", text)
            raise
        assert isinstance(js_obj["color"], str)
        assert isinstance(js_obj["number_of_cars"], int)

Ying Sheng's avatar
Ying Sheng committed
200
201

if __name__ == "__main__":
Lianmin Zheng's avatar
Lianmin Zheng committed
202
    unittest.main()