Unverified Commit 5c04bb8b authored by David Xia's avatar David Xia Committed by GitHub
Browse files

[doc] fix multimodal example script (#18089)


Signed-off-by: default avatarDavid Xia <david@davidxia.com>
parent 3d2779c2
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
"""An example showing how to use vLLM to serve multimodal models """An example showing how to use vLLM to serve multimodal models
and run online serving with OpenAI client. and run online serving with OpenAI client.
Launch the vLLM server with the following command: Launch the vLLM server with the following command:
...@@ -12,12 +12,18 @@ vllm serve microsoft/Phi-3.5-vision-instruct --task generate \ ...@@ -12,12 +12,18 @@ vllm serve microsoft/Phi-3.5-vision-instruct --task generate \
--trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}' --trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}'
(audio inference with Ultravox) (audio inference with Ultravox)
vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b --max-model-len 4096 vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b \
--max-model-len 4096 --trust-remote-code
run the script with
python openai_chat_completion_client_for_multimodal.py --chat-type audio
""" """
import base64 import base64
import requests import requests
from openai import OpenAI from openai import OpenAI
from utils import get_first_model
from vllm.utils import FlexibleArgumentParser from vllm.utils import FlexibleArgumentParser
...@@ -31,9 +37,6 @@ client = OpenAI( ...@@ -31,9 +37,6 @@ client = OpenAI(
base_url=openai_api_base, base_url=openai_api_base,
) )
models = client.models.list()
model = models.data[0].id
def encode_base64_content_from_url(content_url: str) -> str: def encode_base64_content_from_url(content_url: str) -> str:
"""Encode a content retrieved from a remote url to base64 format.""" """Encode a content retrieved from a remote url to base64 format."""
...@@ -46,7 +49,7 @@ def encode_base64_content_from_url(content_url: str) -> str: ...@@ -46,7 +49,7 @@ def encode_base64_content_from_url(content_url: str) -> str:
# Text-only inference # Text-only inference
def run_text_only() -> None: def run_text_only(model: str) -> None:
chat_completion = client.chat.completions.create( chat_completion = client.chat.completions.create(
messages=[{ messages=[{
"role": "user", "role": "user",
...@@ -61,7 +64,7 @@ def run_text_only() -> None: ...@@ -61,7 +64,7 @@ def run_text_only() -> None:
# Single-image input inference # Single-image input inference
def run_single_image() -> None: def run_single_image(model: str) -> None:
## Use image url in the payload ## Use image url in the payload
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
...@@ -117,7 +120,7 @@ def run_single_image() -> None: ...@@ -117,7 +120,7 @@ def run_single_image() -> None:
# Multi-image input inference # Multi-image input inference
def run_multi_image() -> None: def run_multi_image(model: str) -> None:
image_url_duck = "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg" image_url_duck = "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg"
image_url_lion = "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg" image_url_lion = "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg"
chat_completion_from_url = client.chat.completions.create( chat_completion_from_url = client.chat.completions.create(
...@@ -152,7 +155,7 @@ def run_multi_image() -> None: ...@@ -152,7 +155,7 @@ def run_multi_image() -> None:
# Video input inference # Video input inference
def run_video() -> None: def run_video(model: str) -> None:
video_url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4" video_url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
video_base64 = encode_base64_content_from_url(video_url) video_base64 = encode_base64_content_from_url(video_url)
...@@ -208,7 +211,7 @@ def run_video() -> None: ...@@ -208,7 +211,7 @@ def run_video() -> None:
# Audio input inference # Audio input inference
def run_audio() -> None: def run_audio(model: str) -> None:
from vllm.assets.audio import AudioAsset from vllm.assets.audio import AudioAsset
audio_url = AudioAsset("winning_call").url audio_url = AudioAsset("winning_call").url
...@@ -318,7 +321,8 @@ def parse_args(): ...@@ -318,7 +321,8 @@ def parse_args():
def main(args) -> None: def main(args) -> None:
chat_type = args.chat_type chat_type = args.chat_type
example_function_map[chat_type]() model = get_first_model(client)
example_function_map[chat_type](model)
if __name__ == "__main__": if __name__ == "__main__":
......
# SPDX-License-Identifier: Apache-2.0
from openai import APIConnectionError, OpenAI
from openai.pagination import SyncPage
from openai.types.model import Model
def get_first_model(client: OpenAI) -> str:
"""
Get the first model from the vLLM server.
"""
try:
models: SyncPage[Model] = client.models.list()
except APIConnectionError as e:
raise RuntimeError(
"Failed to get the list of models from the vLLM server at "
f"{client.base_url} with API key {client.api_key}. Check\n"
"1. the server is running\n"
"2. the server URL is correct\n"
"3. the API key is correct") from e
if len(models.data) == 0:
raise RuntimeError(
f"No models found on the vLLM server at {client.base_url}")
return models.data[0].id
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment