"requirements/rocm.txt" did not exist on "dc903e70acf9dba74d6afaa50e7b5650d6b9338a"
disagg.yaml 2.01 KB
Newer Older
1
2
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
Alec's avatar
Alec committed
3

4
5
6
apiVersion: nvidia.com/v1alpha1
kind: DynamoGraphDeployment
metadata:
7
  name: vllm-disagg
8
9
10
spec:
  services:
    Frontend:
11
      dynamoNamespace: vllm-disagg
12
      componentType: frontend
13
14
15
      replicas: 1
      resources:
        requests:
16
17
          cpu: "32"
          memory: "10Gi"
18
        limits:
19
20
          cpu: "32"
          memory: "10Gi"
21
22
      extraPodSpec:
        mainContainer:
23
          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
Alec's avatar
Alec committed
24
          workingDir: /workspace/components/backends/vllm
25
26
27
          command:
            - /bin/sh
            - -c
28
          args:
29
            - "python3 -m dynamo.frontend --http-port 8000"
30
    VllmDecodeWorker:
31
      dynamoNamespace: vllm-disagg
32
      envFromSecret: hf-token-secret
33
      componentType: worker
34
35
36
      replicas: 1
      resources:
        requests:
37
38
          cpu: "32"
          memory: "40Gi"
39
          gpu: "1"
40
        limits:
41
42
          cpu: "32"
          memory: "40Gi"
43
          gpu: "1"
44
45
      extraPodSpec:
        mainContainer:
46
          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
Alec's avatar
Alec committed
47
          workingDir: /workspace/components/backends/vllm
48
49
50
          command:
            - /bin/sh
            - -c
51
          args:
52
            - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B"
53
    VllmPrefillWorker:
54
      dynamoNamespace: vllm-disagg
55
      envFromSecret: hf-token-secret
56
      componentType: worker
57
58
59
      replicas: 1
      resources:
        requests:
60
61
          cpu: "32"
          memory: "40Gi"
62
          gpu: "1"
63
        limits:
64
65
          cpu: "32"
          memory: "40Gi"
66
          gpu: "1"
67
68
      extraPodSpec:
        mainContainer:
69
          image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
Alec's avatar
Alec committed
70
          workingDir: /workspace/components/backends/vllm
71
72
73
          command:
            - /bin/sh
            - -c
74
          args:
75
            - "python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --is-prefill-worker"