# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 apiVersion: nvidia.com/v1alpha1 kind: DynamoGraphDeployment metadata: name: trtllm-disagg-planner spec: envs: - name: DYNAMO_SERVICE_CONFIG value: '{"Prometheus":{"global":{"scrape_interval":"5s"},"scrape_configs":[{"job_name":"prometheus","static_configs":[{"targets":["localhost:8000"]}]},{"job_name":"frontend","static_configs":[{"targets":["trtllm-disagg-planner-frontend:8000"]}]}]}}' - name: DYNAMO_NAMESPACE value: "trtllm-disagg-planner" services: Frontend: dynamoNamespace: trtllm-disagg-planner componentType: frontend replicas: 1 extraPodSpec: mainContainer: image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03 workingDir: /workspace/components/backends/trtllm command: - python3 args: - -m - dynamo.frontend - --http-port - "8000" - --kv-cache-block-size - "128" - --router-mode - kv - --kv-overlap-score-weight - "0.0" - --router-temperature - "0.0" - --no-kv-events Planner: dynamoNamespace: trtllm-disagg-planner envFromSecret: hf-token-secret componentType: planner replicas: 1 envs: - name: PROMETHEUS_PORT value: "8000" livenessProbe: exec: command: - /bin/sh - -c - "exit 0" periodSeconds: 60 timeoutSeconds: 30 failureThreshold: 10 readinessProbe: exec: command: - /bin/sh - -c - "exit 0" initialDelaySeconds: 60 periodSeconds: 60 timeoutSeconds: 30 failureThreshold: 10 pvc: create: false name: dynamo-pvc # Must be pre-created before deployment and SLA profiler must have been run mountPoint: /workspace/profiling_results extraPodSpec: mainContainer: image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03 workingDir: /workspace/components/planner/src/dynamo/planner ports: - name: metrics containerPort: 9085 command: - python3 args: - -m - planner_sla - --environment=kubernetes - --backend=trtllm - --adjustment-interval=60 - --profile-results-dir=/workspace/profiling_results - --prometheus-port=9085 Prometheus: # NOTE: this is set on Prometheus to ensure a service is created for the Prometheus component. This is a workaround and should be managed differently. dynamoNamespace: trtllm-disagg-planner componentType: frontend replicas: 1 envs: - name: PYTHONPATH value: "/workspace/components/planner/src" - name: PROMETHEUS_PORT value: "8000" livenessProbe: exec: command: - /bin/sh - -c - "exit 0" periodSeconds: 60 timeoutSeconds: 30 failureThreshold: 10 readinessProbe: exec: command: - /bin/sh - -c - "exit 0" initialDelaySeconds: 30 periodSeconds: 60 timeoutSeconds: 30 failureThreshold: 10 extraPodSpec: mainContainer: image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03 workingDir: /workspace/components/backends/trtllm command: - python3 args: - -m - dynamo.planner.prometheus TRTLLMDecodeWorker: dynamoNamespace: trtllm-disagg-planner envFromSecret: hf-token-secret componentType: worker replicas: 1 livenessProbe: httpGet: path: /live port: 9090 periodSeconds: 5 timeoutSeconds: 30 failureThreshold: 1 readinessProbe: httpGet: path: /health port: 9090 periodSeconds: 10 timeoutSeconds: 30 failureThreshold: 60 resources: limits: gpu: "1" extraPodSpec: terminationGracePeriodSeconds: 600 mainContainer: startupProbe: httpGet: path: /health port: 9090 periodSeconds: 10 failureThreshold: 60 image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03 workingDir: /workspace/components/backends/trtllm command: - python3 args: - -m - dynamo.trtllm - --model-path - Qwen/Qwen3-0.6B - --served-model-name - Qwen/Qwen3-0.6B - --extra-engine-args - engine_configs/decode.yaml - --disaggregation-mode - decode - --disaggregation-strategy - decode_first TRTLLMPrefillWorker: dynamoNamespace: trtllm-disagg-planner envFromSecret: hf-token-secret componentType: worker replicas: 1 resources: limits: gpu: "1" extraPodSpec: terminationGracePeriodSeconds: 600 mainContainer: startupProbe: httpGet: path: /health port: 9090 periodSeconds: 10 failureThreshold: 60 image: nvcr.io/nvidian/dynamo-dev/dynamo-trtllm-runtime:hzhou-0909-03 workingDir: /workspace/components/backends/trtllm command: - python3 args: - -m - dynamo.trtllm - --model-path - Qwen/Qwen3-0.6B - --served-model-name - Qwen/Qwen3-0.6B - --extra-engine-args - engine_configs/prefill.yaml - --disaggregation-mode - prefill - --disaggregation-strategy - decode_first