Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
66b809cc
Commit
66b809cc
authored
Feb 08, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.7.2' into v0.7.2-dev
parents
37b63c24
0408efc6
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
113 additions
and
11 deletions
+113
-11
examples/offline_inference/torchrun_example.py
examples/offline_inference/torchrun_example.py
+1
-0
examples/offline_inference/tpu.py
examples/offline_inference/tpu.py
+2
-0
examples/offline_inference/vision_language.py
examples/offline_inference/vision_language.py
+28
-11
examples/offline_inference/vision_language_embedding.py
examples/offline_inference/vision_language_embedding.py
+1
-0
examples/offline_inference/vision_language_multi_image.py
examples/offline_inference/vision_language_multi_image.py
+59
-0
examples/offline_inference/whisper.py
examples/offline_inference/whisper.py
+2
-0
examples/online_serving/api_client.py
examples/online_serving/api_client.py
+1
-0
examples/online_serving/cohere_rerank_client.py
examples/online_serving/cohere_rerank_client.py
+1
-0
examples/online_serving/gradio_openai_chatbot_webserver.py
examples/online_serving/gradio_openai_chatbot_webserver.py
+2
-0
examples/online_serving/gradio_webserver.py
examples/online_serving/gradio_webserver.py
+2
-0
examples/online_serving/jinaai_rerank_client.py
examples/online_serving/jinaai_rerank_client.py
+1
-0
examples/online_serving/openai_chat_completion_client.py
examples/online_serving/openai_chat_completion_client.py
+2
-0
examples/online_serving/openai_chat_completion_client_for_multimodal.py
...e_serving/openai_chat_completion_client_for_multimodal.py
+1
-0
examples/online_serving/openai_chat_completion_client_with_tools.py
...nline_serving/openai_chat_completion_client_with_tools.py
+1
-0
examples/online_serving/openai_chat_completion_structured_outputs.py
...line_serving/openai_chat_completion_structured_outputs.py
+2
-0
examples/online_serving/openai_chat_completion_with_reasoning.py
...s/online_serving/openai_chat_completion_with_reasoning.py
+1
-0
examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
...erving/openai_chat_completion_with_reasoning_streaming.py
+1
-0
examples/online_serving/openai_chat_embedding_client_for_multimodal.py
...ne_serving/openai_chat_embedding_client_for_multimodal.py
+2
-0
examples/online_serving/openai_completion_client.py
examples/online_serving/openai_completion_client.py
+2
-0
examples/online_serving/openai_cross_encoder_score.py
examples/online_serving/openai_cross_encoder_score.py
+1
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
examples/offline_inference/torchrun_example.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
experimental support for tensor-parallel inference with torchrun,
see https://github.com/vllm-project/vllm/issues/11400 for
...
...
examples/offline_inference/tpu.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
vllm
import
LLM
,
SamplingParams
prompts
=
[
...
...
examples/offline_inference/vision_language.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
This example shows how to use vLLM for running offline inference with
the correct prompt format on vision language models for text generation.
...
...
@@ -530,18 +531,33 @@ def run_qwen2_vl(question: str, modality: str):
return
llm
,
prompt
,
stop_token_ids
# GLM-4v
def
run_glm4v
(
question
:
str
,
modality
:
str
):
assert
modality
==
"image"
model_name
=
"THUDM/glm-4v-9b"
# Qwen2.5-VL
def
run_qwen2_5_vl
(
question
:
str
,
modality
:
str
):
llm
=
LLM
(
model
=
model_name
,
max_model_len
=
2048
,
max_num_seqs
=
2
,
trust_remote_code
=
True
,
enforce_eager
=
True
)
prompt
=
question
stop_token_ids
=
[
151329
,
151336
,
151338
]
model_name
=
"Qwen/Qwen2.5-VL-3B-Instruct"
llm
=
LLM
(
model
=
model_name
,
max_model_len
=
4096
,
max_num_seqs
=
5
,
mm_processor_kwargs
=
{
"min_pixels"
:
28
*
28
,
"max_pixels"
:
1280
*
28
*
28
,
"fps"
:
1
,
},
disable_mm_preprocessor_cache
=
args
.
disable_mm_preprocessor_cache
,
)
if
modality
==
"image"
:
placeholder
=
"<|image_pad|>"
elif
modality
==
"video"
:
placeholder
=
"<|video_pad|>"
prompt
=
(
"<|im_start|>system
\n
You are a helpful assistant.<|im_end|>
\n
"
f
"<|im_start|>user
\n
<|vision_start|>
{
placeholder
}
<|vision_end|>"
f
"
{
question
}
<|im_end|>
\n
"
"<|im_start|>assistant
\n
"
)
stop_token_ids
=
None
return
llm
,
prompt
,
stop_token_ids
...
...
@@ -571,6 +587,7 @@ model_example_map = {
"pixtral_hf"
:
run_pixtral_hf
,
"qwen_vl"
:
run_qwen_vl
,
"qwen2_vl"
:
run_qwen2_vl
,
"qwen2_5_vl"
:
run_qwen2_5_vl
,
}
...
...
examples/offline_inference/vision_language_embedding.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
This example shows how to use vLLM for running offline inference with
the correct prompt format on vision language models for multimodal embedding.
...
...
examples/offline_inference/vision_language_multi_image.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
This example shows how to use vLLM for running offline inference with
multi-image input on vision language models for text generation,
...
...
@@ -391,6 +392,63 @@ def load_qwen2_vl(question, image_urls: List[str]) -> ModelRequestData:
)
def
load_qwen2_5_vl
(
question
,
image_urls
:
List
[
str
])
->
ModelRequestData
:
try
:
from
qwen_vl_utils
import
process_vision_info
except
ModuleNotFoundError
:
print
(
'WARNING: `qwen-vl-utils` not installed, input images will not '
'be automatically resized. You can enable this functionality by '
'`pip install qwen-vl-utils`.'
)
process_vision_info
=
None
model_name
=
"Qwen/Qwen2.5-VL-3B-Instruct"
llm
=
LLM
(
model
=
model_name
,
max_model_len
=
32768
if
process_vision_info
is
None
else
4096
,
max_num_seqs
=
5
,
limit_mm_per_prompt
=
{
"image"
:
len
(
image_urls
)},
)
placeholders
=
[{
"type"
:
"image"
,
"image"
:
url
}
for
url
in
image_urls
]
messages
=
[{
"role"
:
"system"
,
"content"
:
"You are a helpful assistant."
},
{
"role"
:
"user"
,
"content"
:
[
*
placeholders
,
{
"type"
:
"text"
,
"text"
:
question
},
],
}]
processor
=
AutoProcessor
.
from_pretrained
(
model_name
)
prompt
=
processor
.
apply_chat_template
(
messages
,
tokenize
=
False
,
add_generation_prompt
=
True
)
stop_token_ids
=
None
if
process_vision_info
is
None
:
image_data
=
[
fetch_image
(
url
)
for
url
in
image_urls
]
else
:
image_data
,
_
=
process_vision_info
(
messages
,
return_video_sample_fps
=
False
)
return
ModelRequestData
(
llm
=
llm
,
prompt
=
prompt
,
stop_token_ids
=
stop_token_ids
,
image_data
=
image_data
,
chat_template
=
None
,
)
model_example_map
=
{
"aria"
:
load_aria
,
"deepseek_vl_v2"
:
load_deepseek_vl2
,
...
...
@@ -403,6 +461,7 @@ model_example_map = {
"pixtral_hf"
:
load_pixtral_hf
,
"qwen_vl_chat"
:
load_qwen_vl_chat
,
"qwen2_vl"
:
load_qwen2_vl
,
"qwen2_5_vl"
:
load_qwen2_5_vl
,
}
...
...
examples/offline_inference/whisper.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
time
from
vllm
import
LLM
,
SamplingParams
...
...
examples/online_serving/api_client.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""Example Python client for `vllm.entrypoints.api_server`
NOTE: The API server is used only for demonstration and simple performance
benchmarks. It is not intended for production use.
...
...
examples/online_serving/cohere_rerank_client.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
Example of using the OpenAI entrypoint's rerank API which is compatible with
the Cohere SDK: https://github.com/cohere-ai/cohere-python
...
...
examples/online_serving/gradio_openai_chatbot_webserver.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
argparse
import
gradio
as
gr
...
...
examples/online_serving/gradio_webserver.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
argparse
import
json
...
...
examples/online_serving/jinaai_rerank_client.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
Example of using the OpenAI entrypoint's rerank API which is compatible with
Jina and Cohere https://jina.ai/reranker
...
...
examples/online_serving/openai_chat_completion_client.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
openai
import
OpenAI
# Modify OpenAI's API key and API base to use vLLM's API server.
...
...
examples/online_serving/openai_chat_completion_client_for_multimodal.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""An example showing how to use vLLM to serve multimodal models
and run online serving with OpenAI client.
...
...
examples/online_serving/openai_chat_completion_client_with_tools.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
Set up this example by starting a vLLM OpenAI-compatible server with tool call
options enabled. For example:
...
...
examples/online_serving/openai_chat_completion_structured_outputs.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
enum
import
Enum
from
openai
import
OpenAI
...
...
examples/online_serving/openai_chat_completion_with_reasoning.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
An example shows how to generate chat completions from reasoning models
like DeepSeekR1.
...
...
examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
An example shows how to generate chat completions from reasoning models
like DeepSeekR1.
...
...
examples/online_serving/openai_chat_embedding_client_for_multimodal.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
import
argparse
import
base64
import
io
...
...
examples/online_serving/openai_completion_client.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
from
openai
import
OpenAI
# Modify OpenAI's API key and API base to use vLLM's API server.
...
...
examples/online_serving/openai_cross_encoder_score.py
View file @
66b809cc
# SPDX-License-Identifier: Apache-2.0
"""
Example online usage of Score API.
...
...
Prev
1
2
3
4
5
6
7
8
9
10
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment