disagg_planner.yaml 2.48 KB
Newer Older
1
2
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
Alec's avatar
Alec committed
3

4
5
6
apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment
metadata:
7
  name: vllm-disagg-planner
8
9
10
spec:
  services:
    Frontend:
11
      dynamoNamespace: vllm-disagg-planner
12
      componentType: frontend
13
      replicas: 1
14
15
      extraPodSpec:
        mainContainer:
16
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
17
18
19
20
    Planner:
      dynamoNamespace: vllm-disagg-planner
      componentType: planner
      replicas: 1
21
22
      extraPodSpec:
        mainContainer:
23
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
24
          workingDir: /workspace/components/src/dynamo/planner
julienmancuso's avatar
julienmancuso committed
25
          command:
26
27
28
          - python3
          - -m
          - planner_sla
29
          args:
30
31
32
            - --environment=kubernetes
            - --backend=vllm
            - --adjustment-interval=60
33
34
35
36
37
38
39
40
41
42
43
            - --profile-results-dir=/workspace/profiling_results
          volumeMounts:
            - name: planner-profile-data
              mountPath: /workspace/profiling_results
              readOnly: true
        volumes:
          - name: planner-profile-data
            configMap:
              # Must be pre-created before deployment by the profiler
              # See docs/planner/sla_planner_quickstart.md for more details
              name: planner-profile-data
44
    VllmDecodeWorker:
45
      dynamoNamespace: vllm-disagg-planner
46
      envFromSecret: hf-token-secret
47
      componentType: worker
48
      subComponentType: decode
49
      replicas: 1
50
51
      resources:
        limits:
52
          gpu: "1"
53
54
      extraPodSpec:
        mainContainer:
55
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
56
          workingDir: /workspace/examples/backends/vllm
57
          command:
58
            - python3
59
          args:
60
61
62
63
            - -m
            - dynamo.vllm
            - --model
            - Qwen/Qwen3-0.6B
64
    VllmPrefillWorker:
65
      dynamoNamespace: vllm-disagg-planner
66
      envFromSecret: hf-token-secret
67
      componentType: worker
68
      subComponentType: prefill
69
      replicas: 1
70
71
      resources:
        limits:
72
          gpu: "1"
73
74
      extraPodSpec:
        mainContainer:
75
          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
76
          workingDir: /workspace/examples/backends/vllm
77
          command:
78
            - python3
79
          args:
80
81
82
83
84
            - -m
            - dynamo.vllm
            - --model
            - Qwen/Qwen3-0.6B
            - --is-prefill-worker