Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
cc7f22a8
Commit
cc7f22a8
authored
Jun 11, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.9.1' into v0.9.1-ori
parents
b9ea0c09
b6553be1
Changes
1000
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
19 additions
and
31 deletions
+19
-31
examples/online_serving/openai_chat_completion_structured_outputs.py
...line_serving/openai_chat_completion_structured_outputs.py
+1
-1
examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py
...enai_chat_completion_structured_outputs_structural_tag.py
+1
-0
examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py
...enai_chat_completion_structured_outputs_with_reasoning.py
+1
-0
examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py
...rving/openai_chat_completion_tool_calls_with_reasoning.py
+1
-0
examples/online_serving/openai_chat_completion_with_reasoning.py
...s/online_serving/openai_chat_completion_with_reasoning.py
+1
-0
examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
...erving/openai_chat_completion_with_reasoning_streaming.py
+1
-0
examples/online_serving/openai_chat_embedding_client_for_multimodal.py
...ne_serving/openai_chat_embedding_client_for_multimodal.py
+1
-0
examples/online_serving/openai_classification_client.py
examples/online_serving/openai_classification_client.py
+1
-0
examples/online_serving/openai_completion_client.py
examples/online_serving/openai_completion_client.py
+1
-0
examples/online_serving/openai_cross_encoder_score.py
examples/online_serving/openai_cross_encoder_score.py
+1
-0
examples/online_serving/openai_embedding_client.py
examples/online_serving/openai_embedding_client.py
+1
-0
examples/online_serving/openai_embedding_matryoshka_fy.py
examples/online_serving/openai_embedding_matryoshka_fy.py
+1
-0
examples/online_serving/openai_pooling_client.py
examples/online_serving/openai_pooling_client.py
+1
-0
examples/online_serving/openai_transcription_client.py
examples/online_serving/openai_transcription_client.py
+1
-0
examples/online_serving/opentelemetry/dummy_client.py
examples/online_serving/opentelemetry/dummy_client.py
+1
-0
examples/online_serving/prometheus_grafana/grafana.json
examples/online_serving/prometheus_grafana/grafana.json
+0
-30
examples/online_serving/prompt_embed_inference_with_openai_client.py
...line_serving/prompt_embed_inference_with_openai_client.py
+1
-0
examples/online_serving/ray_serve_deepseek.py
examples/online_serving/ray_serve_deepseek.py
+1
-0
examples/online_serving/retrieval_augmented_generation_with_langchain.py
..._serving/retrieval_augmented_generation_with_langchain.py
+1
-0
examples/online_serving/retrieval_augmented_generation_with_llamaindex.py
...serving/retrieval_augmented_generation_with_llamaindex.py
+1
-0
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
examples/online_serving/openai_chat_completion_structured_outputs.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
To run this example, you need to start the vLLM server:
...
...
@@ -138,7 +139,6 @@ def extra_backend_options_completion(client: OpenAI, model: str):
extra_body
=
{
"guided_regex"
:
r
"\w+@\w+\.com\n"
,
"stop"
:
[
"
\n
"
],
"guided_decoding_backend"
:
"xgrammar"
,
"guided_decoding_disable_fallback"
:
True
,
},
)
...
...
examples/online_serving/openai_chat_completion_structured_outputs_structural_tag.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
openai
import
OpenAI
# This example demonstrates the `structural_tag` response format.
...
...
examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
An example shows how to generate structured outputs from reasoning models
like DeepSeekR1. The thinking process will not be guided by the JSON
...
...
examples/online_serving/openai_chat_completion_tool_calls_with_reasoning.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
An example demonstrates how to use tool calling with reasoning models
like QwQ-32B. The reasoning_content will not be parsed by the tool
...
...
examples/online_serving/openai_chat_completion_with_reasoning.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
An example shows how to generate chat completions from reasoning models
like DeepSeekR1.
...
...
examples/online_serving/openai_chat_completion_with_reasoning_streaming.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
An example shows how to generate chat completions from reasoning models
like DeepSeekR1.
...
...
examples/online_serving/openai_chat_embedding_client_for_multimodal.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
argparse
import
base64
...
...
examples/online_serving/openai_classification_client.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
argparse
import
pprint
...
...
examples/online_serving/openai_completion_client.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
argparse
...
...
examples/online_serving/openai_cross_encoder_score.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Example online usage of Score API.
...
...
examples/online_serving/openai_embedding_client.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from
openai
import
OpenAI
...
...
examples/online_serving/openai_embedding_matryoshka_fy.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Example Python client for embedding API dimensions using vLLM API server
NOTE:
start a supported Matryoshka Embeddings model server with `vllm serve`, e.g.
...
...
examples/online_serving/openai_pooling_client.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Example online usage of Pooling API.
...
...
examples/online_serving/openai_transcription_client.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
asyncio
import
json
...
...
examples/online_serving/opentelemetry/dummy_client.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
requests
from
opentelemetry.exporter.otlp.proto.grpc.trace_exporter
import
OTLPSpanExporter
...
...
examples/online_serving/prometheus_grafana/grafana.json
View file @
cc7f22a8
...
...
@@ -577,23 +577,6 @@
"refId"
:
"A"
,
"useBackend"
:
false
},
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"${DS_PROMETHEUS}"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"vllm:num_requests_swapped{model_name=
\"
$model_name
\"
}"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
true
,
"instant"
:
false
,
"legendFormat"
:
"Num Swapped"
,
"range"
:
true
,
"refId"
:
"B"
,
"useBackend"
:
false
},
{
"datasource"
:
{
"type"
:
"prometheus"
,
...
...
@@ -874,19 +857,6 @@
"legendFormat"
:
"GPU Cache Usage"
,
"range"
:
true
,
"refId"
:
"A"
},
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"${DS_PROMETHEUS}"
},
"editorMode"
:
"code"
,
"expr"
:
"vllm:cpu_cache_usage_perc{model_name=
\"
$model_name
\"
}"
,
"hide"
:
false
,
"instant"
:
false
,
"legendFormat"
:
"CPU Cache Usage"
,
"range"
:
true
,
"refId"
:
"B"
}
],
"title"
:
"Cache Utilization"
,
...
...
examples/online_serving/prompt_embed_inference_with_openai_client.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
vLLM OpenAI-Compatible Client with Prompt Embeddings
...
...
examples/online_serving/ray_serve_deepseek.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Example to deploy DeepSeek R1 or V3 with Ray Serve LLM.
See more details at:
...
...
examples/online_serving/retrieval_augmented_generation_with_langchain.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
Retrieval Augmented Generation (RAG) Implementation with Langchain
==================================================================
...
...
examples/online_serving/retrieval_augmented_generation_with_llamaindex.py
View file @
cc7f22a8
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
RAG (Retrieval Augmented Generation) Implementation with LlamaIndex
================================================================
...
...
Prev
1
…
7
8
9
10
11
12
13
14
15
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment