# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 apiVersion: nvidia.com/v1alpha1 kind: DynamoGraphDeployment metadata: name: sgl-dsr1-16gpu spec: envs: - name: HF_HOME value: /opt/model pvcs: - name: model-cache create: false services: Frontend: componentType: frontend replicas: 1 volumeMounts: - name: model-cache mountPoint: /opt/model extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0 decode: componentType: worker subComponentType: decode replicas: 1 multinode: nodeCount: 2 resources: limits: gpu: "8" volumeMounts: - name: model-cache mountPoint: /opt/model sharedMemory: size: 80Gi extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0 workingDir: /sgl-workspace/dynamo command: - python3 - -m - dynamo.sglang args: - --model-path - deepseek-ai/DeepSeek-R1 - --served-model-name - deepseek-ai/DeepSeek-R1 - --tp - "16" - --dp - "16" - --enable-dp-attention - --ep-size - "16" - --trust-remote-code - --skip-tokenizer-init - --disaggregation-mode - decode - --disaggregation-bootstrap-port - "30001" - --mem-fraction-static - "0.75" - --host - 0.0.0.0 - --prefill-round-robin-balance - --watchdog-timeout - "3600" prefill: componentType: worker subComponentType: prefill replicas: 1 multinode: nodeCount: 2 resources: limits: gpu: "8" volumeMounts: - name: model-cache mountPoint: /opt/model sharedMemory: size: 80Gi extraPodSpec: mainContainer: image: nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.0.0 workingDir: /sgl-workspace/dynamo command: - python3 - -m - dynamo.sglang args: - --model-path - deepseek-ai/DeepSeek-R1 - --served-model-name - deepseek-ai/DeepSeek-R1 - --tp - "16" - --ep-size - "16" - --trust-remote-code - --skip-tokenizer-init - --disaggregation-mode - prefill - --disaggregation-bootstrap-port - "30001" - --mem-fraction-static - "0.75" - --host - 0.0.0.0 - --load-balance-method - round_robin - --watchdog-timeout - "3600"