Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
583e9009
Unverified
Commit
583e9009
authored
Apr 24, 2025
by
Rui Qiao
Committed by
GitHub
Apr 24, 2025
Browse files
[Misc] Add example to run DeepSeek with Ray Serve LLM (#17134)
Signed-off-by:
Rui Qiao
<
ruisearch42@gmail.com
>
parent
05e1fbfc
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
44 additions
and
0 deletions
+44
-0
examples/online_serving/ray_serve_deepseek.py
examples/online_serving/ray_serve_deepseek.py
+44
-0
No files found.
examples/online_serving/ray_serve_deepseek.py
0 → 100644
View file @
583e9009
# SPDX-License-Identifier: Apache-2.0
"""
Example to deploy DeepSeek R1 or V3 with Ray Serve LLM.
See Ray Serve LLM documentation at:
https://docs.ray.io/en/latest/serve/llm/serving-llms.html
Run `python3 ray_serve_deepseek.py` to deploy the model.
"""
from
ray
import
serve
from
ray.serve.llm
import
LLMConfig
,
LLMRouter
,
LLMServer
llm_config
=
LLMConfig
(
model_loading_config
=
dict
(
model_id
=
"deepseek"
,
# Change to model download path
model_source
=
"/path/to/the/model"
,
),
deployment_config
=
dict
(
autoscaling_config
=
dict
(
min_replicas
=
1
,
max_replicas
=
1
,
)),
# Change to the accelerator type of the node
accelerator_type
=
"H100"
,
runtime_env
=
dict
(
env_vars
=
dict
(
VLLM_USE_V1
=
"1"
)),
# Customize engine arguments as needed (e.g. vLLM engine kwargs)
engine_kwargs
=
dict
(
tensor_parallel_size
=
8
,
pipeline_parallel_size
=
2
,
gpu_memory_utilization
=
0.92
,
dtype
=
"auto"
,
max_num_seqs
=
40
,
max_model_len
=
16384
,
enable_chunked_prefill
=
True
,
enable_prefix_caching
=
True
,
trust_remote_code
=
True
,
),
)
# Deploy the application
deployment
=
LLMServer
.
as_deployment
(
llm_config
.
get_serve_options
(
name_prefix
=
"vLLM:"
)).
bind
(
llm_config
)
llm_app
=
LLMRouter
.
as_deployment
().
bind
([
deployment
])
serve
.
run
(
llm_app
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment