Unverified Commit 66e7dcaf authored by Kaichen Zhang - NTU's avatar Kaichen Zhang - NTU Committed by GitHub
Browse files

[Fix] Fixing the multi-images error for llava-onevision (#1205)

parent bc4c7a35
...@@ -78,6 +78,51 @@ def image_stream_request_test(client): ...@@ -78,6 +78,51 @@ def image_stream_request_test(client):
print("-" * 30) print("-" * 30)
def multi_image_stream_request_test(client):
print(
"----------------------Multi-Images Stream Request Test----------------------"
)
stream_request = client.chat.completions.create(
model="default",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png"
},
},
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png"
},
},
{
"type": "text",
"text": "I have shown you two images. Please describe the two images to me.",
},
],
},
],
temperature=0.7,
max_tokens=1024,
stream=True,
)
stream_response = ""
for chunk in stream_request:
if chunk.choices[0].delta.content is not None:
content = chunk.choices[0].delta.content
stream_response += content
sys.stdout.write(content)
sys.stdout.flush()
print("-" * 30)
def video_stream_request_test(client, video_path): def video_stream_request_test(client, video_path):
print("------------------------Video Stream Request Test----------------------") print("------------------------Video Stream Request Test----------------------")
messages = prepare_video_messages(video_path) messages = prepare_video_messages(video_path)
...@@ -209,6 +254,7 @@ def main(): ...@@ -209,6 +254,7 @@ def main():
client = create_openai_client("http://127.0.0.1:30000/v1") client = create_openai_client("http://127.0.0.1:30000/v1")
image_stream_request_test(client) image_stream_request_test(client)
multi_image_stream_request_test(client)
video_stream_request_test(client, video_path) video_stream_request_test(client, video_path)
image_speed_test(client) image_speed_test(client)
video_speed_test(client, video_path) video_speed_test(client, video_path)
......
...@@ -744,7 +744,9 @@ def get_pixel_values( ...@@ -744,7 +744,9 @@ def get_pixel_values(
image, image,
tuple(int(x * 255) for x in processor.image_processor.image_mean), tuple(int(x * 255) for x in processor.image_processor.image_mean),
) )
pixel_values = processor.image_processor(image)["pixel_values"][0] pixel_values = processor.image_processor(image.convert("RGB"))[
"pixel_values"
][0]
elif image_aspect_ratio == "anyres" or "anyres_max" in image_aspect_ratio: elif image_aspect_ratio == "anyres" or "anyres_max" in image_aspect_ratio:
pixel_values = process_anyres_image( pixel_values = process_anyres_image(
image, processor.image_processor, image_grid_pinpoints image, processor.image_processor, image_grid_pinpoints
......
...@@ -74,6 +74,48 @@ class TestOpenAIVisionServer(unittest.TestCase): ...@@ -74,6 +74,48 @@ class TestOpenAIVisionServer(unittest.TestCase):
assert response.usage.completion_tokens > 0 assert response.usage.completion_tokens > 0
assert response.usage.total_tokens > 0 assert response.usage.total_tokens > 0
def test_mult_images_chat_completion(self):
client = openai.Client(api_key=self.api_key, base_url=self.base_url)
response = client.chat.completions.create(
model="default",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/assets/logo.png"
},
},
{
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/sgl-project/sglang/main/test/lang/example_image.png"
},
},
{
"type": "text",
"text": "I have shown you two images. Please describe the two images to me.",
},
],
},
],
temperature=0,
)
assert response.choices[0].message.role == "assistant"
text = response.choices[0].message.content
assert isinstance(text, str)
assert "man" in text or "cab" in text, text
assert "logo" in text, text
assert response.id
assert response.created
assert response.usage.prompt_tokens > 0
assert response.usage.completion_tokens > 0
assert response.usage.total_tokens > 0
def prepare_video_messages(self, video_path): def prepare_video_messages(self, video_path):
max_frames_num = 32 max_frames_num = 32
vr = VideoReader(video_path, ctx=cpu(0)) vr = VideoReader(video_path, ctx=cpu(0))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment