feat: Add DSR1 configurations (#1298)

6f0ee60d · ptarasiewiczNV · GitHub · ba16ed52 · 6f0ee60d · 6f0ee60d
Unverified Commit 6f0ee60d authored Jun 03, 2025 by ptarasiewiczNV Committed by GitHub Jun 03, 2025
5 changed files
--- a/examples/vllm_v1/README.md
+++ b/examples/vllm_v1/README.md
@@ -64,3 +64,50 @@ curl localhost:8000/v1/completions \

 For more detailed explenations, refer to the main [LLM examples README](../llm/README.md).

+
+
+## Deepseek R1
+
+To run DSR1 model please first follow the Ray setup from the [multinode documentation](../../docs/examples/multinode.md).
+
+### Aggregated Deployment
+
+```bash
+cd examples/vllm_v1
+dynamo serve graphs.agg:Frontend -f configs/deepseek_r1/agg.yaml
+```
+
+
+### Disaggregated Deployment
+
+To create frontend with a single decode worker:
+```bash
+cd examples/vllm_v1
+dynamo serve graphs.agg:Frontend -f configs/deepseek_r1/disagg.yaml
+```
+
+To create a single decode worker:
+```bash
+cd examples/vllm_v1
+dynamo serve components.worker:VllmDecodeWorker -f configs/deepseek_r1/disagg.yaml
+```
+
+To create a single prefill worker:
+```bash
+cd examples/vllm_v1
+dynamo serve components.worker:VllmPrefillWorker -f configs/deepseek_r1/disagg.yaml
+```
+
+## Testing
+
+Send a test request using curl:
+```bash
+curl localhost:8000/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "deepseek-ai/DeepSeek-R1",
+    "prompt": "In the heart of Eldoria...",
+    "stream": false,
+    "max_tokens": 30
+  }'
+```
\ No newline at end of file
--- a/examples/vllm_v1/configs/agg.yaml
+++ b/examples/vllm_v1/configs/agg.yaml
@@ -30,5 +30,5 @@ VllmDecodeWorker:
  ServiceArgs:
    workers: 1
    resources:
-      gpu: 1
+      gpu: '1'
  common-configs: [model, served_model_name]
--- a/examples/vllm_v1/configs/deepseek_r1/agg.yaml
+++ b/examples/vllm_v1/configs/deepseek_r1/agg.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+Common:
+  model: deepseek-ai/DeepSeek-R1
+  served_model_name: deepseek-ai/DeepSeek-R1
+
+Frontend:
+  endpoint: dynamo.SimpleLoadBalancer.generate
+  port: 8000
+  common-configs: [served_model_name]
+
+SimpleLoadBalancer:
+  enable_disagg: false
+  common-configs: [model, served_model_name]
+
+VllmDecodeWorker:
+  tensor_parallel_size: 16
+  ServiceArgs:
+    workers: 1
+    resources:
+      gpu: '16'
+  common-configs: [model, served_model_name]
--- a/examples/vllm_v1/configs/deepseek_r1/disagg.yaml
+++ b/examples/vllm_v1/configs/deepseek_r1/disagg.yaml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+Common:
+  model: deepseek-ai/DeepSeek-R1
+  kv_transfer_config:
+    kv_connector: NixlConnector
+    kv_role: kv_both
+  served_model_name: deepseek-ai/DeepSeek-R1
+
+Frontend:
+  endpoint: dynamo.SimpleLoadBalancer.generate
+  port: 8000
+  common-configs: [served_model_name]
+
+SimpleLoadBalancer:
+  enable_disagg: true
+  common-configs: [model, kv_transfer_config, served_model_name]
+
+VllmPrefillWorker:
+  tensor_parallel_size: 16
+  ServiceArgs:
+    workers: 1
+    resources:
+      gpu: '16'
+  common-configs: [model, kv_transfer_config, served_model_name]
+
+
+VllmDecodeWorker:
+  tensor_parallel_size: 16
+  ServiceArgs:
+    workers: 1
+    resources:
+      gpu: '16'
+  common-configs: [model, kv_transfer_config, served_model_name]
--- a/examples/vllm_v1/configs/disagg.yaml
+++ b/examples/vllm_v1/configs/disagg.yaml
@@ -31,7 +31,7 @@ VllmPrefillWorker:
  ServiceArgs:
    workers: 1
    resources:
-      gpu: 1
+      gpu: '1'
  common-configs: [model, kv-transfer-config, served_model_name]

 VllmDecodeWorker:
@@ -39,5 +39,5 @@ VllmDecodeWorker:
  ServiceArgs:
    workers: 1
    resources:
-      gpu: 1
+      gpu: '1'
  common-configs: [model, kv-transfer-config, served_model_name]