Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
5c04bb8b
Unverified
Commit
5c04bb8b
authored
May 16, 2025
by
David Xia
Committed by
GitHub
May 16, 2025
Browse files
[doc] fix multimodal example script (#18089)
Signed-off-by:
David Xia
<
david@davidxia.com
>
parent
3d2779c2
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
40 additions
and
11 deletions
+40
-11
examples/online_serving/openai_chat_completion_client_for_multimodal.py
...e_serving/openai_chat_completion_client_for_multimodal.py
+15
-11
examples/online_serving/utils.py
examples/online_serving/utils.py
+25
-0
No files found.
examples/online_serving/openai_chat_completion_client_for_multimodal.py
View file @
5c04bb8b
...
...
@@ -12,12 +12,18 @@ vllm serve microsoft/Phi-3.5-vision-instruct --task generate \
--trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}'
(audio inference with Ultravox)
vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b --max-model-len 4096
vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b
\
--max-model-len 4096 --trust-remote-code
run the script with
python openai_chat_completion_client_for_multimodal.py --chat-type audio
"""
import
base64
import
requests
from
openai
import
OpenAI
from
utils
import
get_first_model
from
vllm.utils
import
FlexibleArgumentParser
...
...
@@ -31,9 +37,6 @@ client = OpenAI(
base_url
=
openai_api_base
,
)
models
=
client
.
models
.
list
()
model
=
models
.
data
[
0
].
id
def
encode_base64_content_from_url
(
content_url
:
str
)
->
str
:
"""Encode a content retrieved from a remote url to base64 format."""
...
...
@@ -46,7 +49,7 @@ def encode_base64_content_from_url(content_url: str) -> str:
# Text-only inference
def
run_text_only
()
->
None
:
def
run_text_only
(
model
:
str
)
->
None
:
chat_completion
=
client
.
chat
.
completions
.
create
(
messages
=
[{
"role"
:
"user"
,
...
...
@@ -61,7 +64,7 @@ def run_text_only() -> None:
# Single-image input inference
def
run_single_image
()
->
None
:
def
run_single_image
(
model
:
str
)
->
None
:
## Use image url in the payload
image_url
=
"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
...
...
@@ -117,7 +120,7 @@ def run_single_image() -> None:
# Multi-image input inference
def
run_multi_image
()
->
None
:
def
run_multi_image
(
model
:
str
)
->
None
:
image_url_duck
=
"https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg"
image_url_lion
=
"https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg"
chat_completion_from_url
=
client
.
chat
.
completions
.
create
(
...
...
@@ -152,7 +155,7 @@ def run_multi_image() -> None:
# Video input inference
def
run_video
()
->
None
:
def
run_video
(
model
:
str
)
->
None
:
video_url
=
"http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4"
video_base64
=
encode_base64_content_from_url
(
video_url
)
...
...
@@ -208,7 +211,7 @@ def run_video() -> None:
# Audio input inference
def
run_audio
()
->
None
:
def
run_audio
(
model
:
str
)
->
None
:
from
vllm.assets.audio
import
AudioAsset
audio_url
=
AudioAsset
(
"winning_call"
).
url
...
...
@@ -318,7 +321,8 @@ def parse_args():
def
main
(
args
)
->
None
:
chat_type
=
args
.
chat_type
example_function_map
[
chat_type
]()
model
=
get_first_model
(
client
)
example_function_map
[
chat_type
](
model
)
if
__name__
==
"__main__"
:
...
...
examples/online_serving/utils.py
0 → 100644
View file @
5c04bb8b
# SPDX-License-Identifier: Apache-2.0
from
openai
import
APIConnectionError
,
OpenAI
from
openai.pagination
import
SyncPage
from
openai.types.model
import
Model
def
get_first_model
(
client
:
OpenAI
)
->
str
:
"""
Get the first model from the vLLM server.
"""
try
:
models
:
SyncPage
[
Model
]
=
client
.
models
.
list
()
except
APIConnectionError
as
e
:
raise
RuntimeError
(
"Failed to get the list of models from the vLLM server at "
f
"
{
client
.
base_url
}
with API key
{
client
.
api_key
}
. Check
\n
"
"1. the server is running
\n
"
"2. the server URL is correct
\n
"
"3. the API key is correct"
)
from
e
if
len
(
models
.
data
)
==
0
:
raise
RuntimeError
(
f
"No models found on the vLLM server at
{
client
.
base_url
}
"
)
return
models
.
data
[
0
].
id
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment