Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5c04bb8b
Unverified
Commit
5c04bb8b
authored
May 16, 2025
by
David Xia
Committed by
GitHub
May 16, 2025
Browse files
[doc] fix multimodal example script (#18089)
Signed-off-by:
David Xia
<
david@davidxia.com
>
parent
3d2779c2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
40 additions
and
11 deletions
+40
-11
examples/online_serving/openai_chat_completion_client_for_multimodal.py
...e_serving/openai_chat_completion_client_for_multimodal.py
+15
-11
examples/online_serving/utils.py
examples/online_serving/utils.py
+25
-0
No files found.
examples/online_serving/openai_chat_completion_client_for_multimodal.py
View file @
5c04bb8b
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
"""An example showing how to use vLLM to serve multimodal models
"""An example showing how to use vLLM to serve multimodal models
and run online serving with OpenAI client.
and run online serving with OpenAI client.
Launch the vLLM server with the following command:
Launch the vLLM server with the following command:
...
@@ -12,12 +12,18 @@ vllm serve microsoft/Phi-3.5-vision-instruct --task generate \
...
@@ -12,12 +12,18 @@ vllm serve microsoft/Phi-3.5-vision-instruct --task generate \
--trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}'
--trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}'
(audio inference with Ultravox)
(audio inference with Ultravox)
vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b --max-model-len 4096
vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b
\
--max-model-len 4096 --trust-remote-code
run the script with
python openai_chat_completion_client_for_multimodal.py --chat-type audio
"""
"""
import
base64
import
base64
import
requests
import
requests
from
openai
import
OpenAI
from
openai
import
OpenAI
from
utils
import
get_first_model
from
vllm.utils
import
FlexibleArgumentParser
from
vllm.utils
import
FlexibleArgumentParser
...
@@ -31,9 +37,6 @@ client = OpenAI(
...
@@ -31,9 +37,6 @@ client = OpenAI(
base_url
=
openai_api_base
,
base_url
=
openai_api_base
,
)
)
models
=
client
.
models
.
list
()
model
=
models
.
data
[
0
].
id
def
encode_base64_content_from_url
(
content_url
:
str
)
->
str
:
def
encode_base64_content_from_url
(
content_url
:
str
)
->
str
:
"""Encode a content retrieved from a remote url to base64 format."""
"""Encode a content retrieved from a remote url to base64 format."""
...
@@ -46,7 +49,7 @@ def encode_base64_content_from_url(content_url: str) -> str:
...
@@ -46,7 +49,7 @@ def encode_base64_content_from_url(content_url: str) -> str:
# Text-only inference
# Text-only inference
def
run_text_only
()
->
None
:
def
run_text_only
(
model
:
str
)
->
None
:
chat_completion
=
client
.
chat
.
completions
.
create
(
chat_completion
=
client
.
chat
.
completions
.
create
(
messages
=
[{
messages
=
[{
"role"
:
"user"
,
"role"
:
"user"
,
...
@@ -61,7 +64,7 @@ def run_text_only() -> None:
...
@@ -61,7 +64,7 @@ def run_text_only() -> None:
# Single-image input inference
# Single-image input inference
def
run_single_image
()
->
None
:
def
run_single_image
(
model
:
str
)
->
None
:
## Use image url in the payload
## Use image url in the payload
image_url
=
"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
image_url
=
"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
...
@@ -117,7 +120,7 @@ def run_single_image() -> None:
...
@@ -117,7 +120,7 @@ def run_single_image() -> None:
# Multi-image input inference
# Multi-image input inference
def
run_multi_image
()
->
None
:
def
run_multi_image
(
model
:
str
)
->
None
:
image_url_duck
=
"https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg"
image_url_duck
=
"https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg"
image_url_lion
=
"https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg"
image_url_lion
=
"https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg"
chat_completion_from_url
=
client
.
chat
.
completions
.
create
(
chat_completion_from_url
=
client
.
chat
.
completions
.
create
(
...
@@ -152,7 +155,7 @@ def run_multi_image() -> None:
...
@@ -152,7 +155,7 @@ def run_multi_image() -> None:
# Video input inference
# Video input inference
def
run_video
()
->
None
:
def
run_video
(
model
:
str
)
->
None
:
video_url
=
"http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
video_url
=
"http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
video_base64
=
encode_base64_content_from_url
(
video_url
)
video_base64
=
encode_base64_content_from_url
(
video_url
)
...
@@ -208,7 +211,7 @@ def run_video() -> None:
...
@@ -208,7 +211,7 @@ def run_video() -> None:
# Audio input inference
# Audio input inference
def
run_audio
()
->
None
:
def
run_audio
(
model
:
str
)
->
None
:
from
vllm.assets.audio
import
AudioAsset
from
vllm.assets.audio
import
AudioAsset
audio_url
=
AudioAsset
(
"winning_call"
).
url
audio_url
=
AudioAsset
(
"winning_call"
).
url
...
@@ -318,7 +321,8 @@ def parse_args():
...
@@ -318,7 +321,8 @@ def parse_args():
def
main
(
args
)
->
None
:
def
main
(
args
)
->
None
:
chat_type
=
args
.
chat_type
chat_type
=
args
.
chat_type
example_function_map
[
chat_type
]()
model
=
get_first_model
(
client
)
example_function_map
[
chat_type
](
model
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
examples/online_serving/utils.py
0 → 100644
View file @
5c04bb8b
# SPDX-License-Identifier: Apache-2.0
from
openai
import
APIConnectionError
,
OpenAI
from
openai.pagination
import
SyncPage
from
openai.types.model
import
Model
def
get_first_model
(
client
:
OpenAI
)
->
str
:
"""
Get the first model from the vLLM server.
"""
try
:
models
:
SyncPage
[
Model
]
=
client
.
models
.
list
()
except
APIConnectionError
as
e
:
raise
RuntimeError
(
"Failed to get the list of models from the vLLM server at "
f
"
{
client
.
base_url
}
with API key
{
client
.
api_key
}
. Check
\n
"
"1. the server is running
\n
"
"2. the server URL is correct
\n
"
"3. the API key is correct"
)
from
e
if
len
(
models
.
data
)
==
0
:
raise
RuntimeError
(
f
"No models found on the vLLM server at
{
client
.
base_url
}
"
)
return
models
.
data
[
0
].
id
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment