Unverified Commit c8770464 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: normalize dynamo namespace computation (#5231)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent abd4b5d9
...@@ -8,14 +8,12 @@ metadata: ...@@ -8,14 +8,12 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: dynamo
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/sglang-runtime:my-tag image: my-registry/sglang-runtime:my-tag
Planner: Planner:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: planner componentType: planner
replicas: 1 replicas: 1
...@@ -43,7 +41,6 @@ spec: ...@@ -43,7 +41,6 @@ spec:
# See docs/planner/sla_planner_quickstart.md for more details # See docs/planner/sla_planner_quickstart.md for more details
name: planner-profile-data name: planner-profile-data
decode: decode:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
...@@ -79,7 +76,6 @@ spec: ...@@ -79,7 +76,6 @@ spec:
- --host - --host
- "0.0.0.0" - "0.0.0.0"
prefill: prefill:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
......
...@@ -29,7 +29,6 @@ metadata: ...@@ -29,7 +29,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: trtllm-agg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -37,7 +36,6 @@ spec: ...@@ -37,7 +36,6 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
TRTLLMWorker: TRTLLMWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
......
...@@ -8,7 +8,6 @@ metadata: ...@@ -8,7 +8,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: trtllm-agg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -16,7 +15,6 @@ spec: ...@@ -16,7 +15,6 @@ spec:
image: my-registry/tensorrtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
TRTLLMWorker: TRTLLMWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
......
...@@ -8,7 +8,6 @@ metadata: ...@@ -8,7 +8,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: trtllm-agg-router
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -19,7 +18,6 @@ spec: ...@@ -19,7 +18,6 @@ spec:
value: kv value: kv
TRTLLMWorker: TRTLLMWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg-router
componentType: worker componentType: worker
replicas: 2 replicas: 2
resources: resources:
......
...@@ -93,7 +93,6 @@ spec: ...@@ -93,7 +93,6 @@ spec:
volumeMounts: volumeMounts:
- name: models - name: models
mountPoint: /models mountPoint: /models
dynamoNamespace: trtllm-disagg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -111,7 +110,6 @@ spec: ...@@ -111,7 +110,6 @@ spec:
volumeMounts: volumeMounts:
- name: models - name: models
mountPoint: /models mountPoint: /models
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 1 replicas: 1
...@@ -149,7 +147,6 @@ spec: ...@@ -149,7 +147,6 @@ spec:
volumeMounts: volumeMounts:
- name: models - name: models
mountPoint: /models mountPoint: /models
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 1 replicas: 1
......
...@@ -8,14 +8,12 @@ metadata: ...@@ -8,14 +8,12 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: trtllm-disagg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: my-registry/tensorrtllm-runtime:my-tag image: my-registry/tensorrtllm-runtime:my-tag
TRTLLMPrefillWorker: TRTLLMPrefillWorker:
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
...@@ -41,7 +39,6 @@ spec: ...@@ -41,7 +39,6 @@ spec:
- --disaggregation-mode - --disaggregation-mode
- prefill - prefill
TRTLLMDecodeWorker: TRTLLMDecodeWorker:
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
......
...@@ -8,7 +8,6 @@ metadata: ...@@ -8,7 +8,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: trtllm-disagg-planner
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -32,7 +31,6 @@ spec: ...@@ -32,7 +31,6 @@ spec:
- "0.0" - "0.0"
- --no-kv-events - --no-kv-events
Planner: Planner:
dynamoNamespace: trtllm-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: planner componentType: planner
replicas: 1 replicas: 1
...@@ -63,7 +61,6 @@ spec: ...@@ -63,7 +61,6 @@ spec:
# See docs/planner/sla_planner_quickstart.md for more details # See docs/planner/sla_planner_quickstart.md for more details
name: planner-profile-data name: planner-profile-data
TRTLLMDecodeWorker: TRTLLMDecodeWorker:
dynamoNamespace: trtllm-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
...@@ -104,7 +101,6 @@ spec: ...@@ -104,7 +101,6 @@ spec:
- --disaggregation-mode - --disaggregation-mode
- decode - decode
TRTLLMPrefillWorker: TRTLLMPrefillWorker:
dynamoNamespace: trtllm-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
......
...@@ -8,7 +8,6 @@ metadata: ...@@ -8,7 +8,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: trtllm-v1-disagg-router
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -18,7 +17,6 @@ spec: ...@@ -18,7 +17,6 @@ spec:
- name: DYN_ROUTER_MODE - name: DYN_ROUTER_MODE
value: kv value: kv
TRTLLMPrefillWorker: TRTLLMPrefillWorker:
dynamoNamespace: trtllm-v1-disagg-router
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 2 replicas: 2
...@@ -44,7 +42,6 @@ spec: ...@@ -44,7 +42,6 @@ spec:
- prefill - prefill
- --publish-events-and-metrics - --publish-events-and-metrics
TRTLLMDecodeWorker: TRTLLMDecodeWorker:
dynamoNamespace: trtllm-v1-disagg-router
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 2 replicas: 2
......
...@@ -8,7 +8,6 @@ metadata: ...@@ -8,7 +8,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-agg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -16,7 +15,6 @@ spec: ...@@ -16,7 +15,6 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
......
...@@ -8,7 +8,6 @@ metadata: ...@@ -8,7 +8,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-agg-kvbm
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -16,7 +15,6 @@ spec: ...@@ -16,7 +15,6 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-kvbm
componentType: worker componentType: worker
replicas: 1 replicas: 1
resources: resources:
......
...@@ -8,7 +8,6 @@ metadata: ...@@ -8,7 +8,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-agg-router
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -19,7 +18,6 @@ spec: ...@@ -19,7 +18,6 @@ spec:
value: kv value: kv
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-router
componentType: worker componentType: worker
replicas: 2 replicas: 2
resources: resources:
......
...@@ -12,7 +12,6 @@ metadata: ...@@ -12,7 +12,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-agg-router-kv-approx
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -31,7 +30,6 @@ spec: ...@@ -31,7 +30,6 @@ spec:
value: kv value: kv
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-router-kv-approx
componentType: worker componentType: worker
replicas: 2 replicas: 2
resources: resources:
......
...@@ -8,7 +8,6 @@ metadata: ...@@ -8,7 +8,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-disagg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -23,7 +22,6 @@ spec: ...@@ -23,7 +22,6 @@ spec:
- --http-port - --http-port
- "8000" - "8000"
decode: decode:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 1 replicas: 1
...@@ -46,7 +44,6 @@ spec: ...@@ -46,7 +44,6 @@ spec:
- --tensor-parallel-size - --tensor-parallel-size
- "2" - "2"
prefill: prefill:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 1 replicas: 1
......
...@@ -8,14 +8,12 @@ metadata: ...@@ -8,14 +8,12 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-disagg
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
...@@ -36,7 +34,6 @@ spec: ...@@ -36,7 +34,6 @@ spec:
- Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
- --is-decode-worker - --is-decode-worker
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
......
...@@ -8,14 +8,12 @@ metadata: ...@@ -8,14 +8,12 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-disagg-kvbm
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-disagg-kvbm
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 1 replicas: 1
...@@ -37,7 +35,6 @@ spec: ...@@ -37,7 +35,6 @@ spec:
- "32000" - "32000"
- --enforce-eager - --enforce-eager
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-disagg-kvbm
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 1 replicas: 1
......
...@@ -8,14 +8,12 @@ metadata: ...@@ -8,14 +8,12 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-disagg-kvbm-2p2d
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-disagg-kvbm-2p2d
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 2 replicas: 2
...@@ -37,7 +35,6 @@ spec: ...@@ -37,7 +35,6 @@ spec:
- "32000" - "32000"
- --enforce-eager - --enforce-eager
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-disagg-kvbm-2p2d
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 2 replicas: 2
......
...@@ -8,14 +8,12 @@ metadata: ...@@ -8,14 +8,12 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-disagg-kvbm-tp2
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-disagg-kvbm-tp2
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 1 replicas: 1
...@@ -43,7 +41,6 @@ spec: ...@@ -43,7 +41,6 @@ spec:
- --tensor-parallel-size - --tensor-parallel-size
- "2" - "2"
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-disagg-kvbm-tp2
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 1 replicas: 1
......
...@@ -8,14 +8,12 @@ metadata: ...@@ -8,14 +8,12 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-disagg-planner
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
mainContainer: mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
Planner: Planner:
dynamoNamespace: vllm-disagg-planner
componentType: planner componentType: planner
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -42,7 +40,6 @@ spec: ...@@ -42,7 +40,6 @@ spec:
# See docs/planner/sla_planner_quickstart.md for more details # See docs/planner/sla_planner_quickstart.md for more details
name: planner-profile-data name: planner-profile-data
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
...@@ -62,7 +59,6 @@ spec: ...@@ -62,7 +59,6 @@ spec:
- --model - --model
- Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
subComponentType: prefill subComponentType: prefill
......
...@@ -8,7 +8,6 @@ metadata: ...@@ -8,7 +8,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-v1-disagg-router
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -18,7 +17,6 @@ spec: ...@@ -18,7 +17,6 @@ spec:
- name: DYN_ROUTER_MODE - name: DYN_ROUTER_MODE
value: kv value: kv
VllmDecodeWorker: VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 2 replicas: 2
...@@ -38,7 +36,6 @@ spec: ...@@ -38,7 +36,6 @@ spec:
- Qwen/Qwen3-0.6B - Qwen/Qwen3-0.6B
- --is-decode-worker - --is-decode-worker
VllmPrefillWorker: VllmPrefillWorker:
dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
componentType: worker componentType: worker
replicas: 2 replicas: 2
......
...@@ -7,7 +7,6 @@ metadata: ...@@ -7,7 +7,6 @@ metadata:
spec: spec:
services: services:
Frontend: Frontend:
dynamoNamespace: vllm-agg-lora
componentType: frontend componentType: frontend
replicas: 1 replicas: 1
extraPodSpec: extraPodSpec:
...@@ -15,7 +14,6 @@ spec: ...@@ -15,7 +14,6 @@ spec:
image: nvcr.io/nvidian/dynamo-dev/biswa:7e499b5c460f1883a9945d221123e0760051210f-39500608-vllm-amd64 image: nvcr.io/nvidian/dynamo-dev/biswa:7e499b5c460f1883a9945d221123e0760051210f-39500608-vllm-amd64
VllmDecodeWorker: VllmDecodeWorker:
envFromSecret: hf-token-secret envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-lora
componentType: worker componentType: worker
subComponentType: decode subComponentType: decode
replicas: 1 replicas: 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment