Unverified Commit c8770464 authored by hhzhang16's avatar hhzhang16 Committed by GitHub
Browse files

feat: normalize dynamo namespace computation (#5231)


Signed-off-by: default avatarHannah Zhang <hannahz@nvidia.com>
parent abd4b5d9
......@@ -8,14 +8,12 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: dynamo
componentType: frontend
replicas: 1
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
Planner:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret
componentType: planner
replicas: 1
......@@ -43,7 +41,6 @@ spec:
# See docs/planner/sla_planner_quickstart.md for more details
name: planner-profile-data
decode:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
......@@ -79,7 +76,6 @@ spec:
- --host
- "0.0.0.0"
prefill:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
......
......@@ -29,7 +29,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: trtllm-agg
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -37,7 +36,6 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:my-tag
TRTLLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg
componentType: worker
replicas: 1
resources:
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: trtllm-agg
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -16,7 +15,6 @@ spec:
image: my-registry/tensorrtllm-runtime:my-tag
TRTLLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg
componentType: worker
replicas: 1
resources:
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: trtllm-agg-router
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -19,7 +18,6 @@ spec:
value: kv
TRTLLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg-router
componentType: worker
replicas: 2
resources:
......
......@@ -93,7 +93,6 @@ spec:
volumeMounts:
- name: models
mountPoint: /models
dynamoNamespace: trtllm-disagg
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -111,7 +110,6 @@ spec:
volumeMounts:
- name: models
mountPoint: /models
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......@@ -149,7 +147,6 @@ spec:
volumeMounts:
- name: models
mountPoint: /models
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......
......@@ -8,14 +8,12 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: trtllm-disagg
componentType: frontend
replicas: 1
extraPodSpec:
mainContainer:
image: my-registry/tensorrtllm-runtime:my-tag
TRTLLMPrefillWorker:
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
......@@ -41,7 +39,6 @@ spec:
- --disaggregation-mode
- prefill
TRTLLMDecodeWorker:
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: trtllm-disagg-planner
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -32,7 +31,6 @@ spec:
- "0.0"
- --no-kv-events
Planner:
dynamoNamespace: trtllm-disagg-planner
envFromSecret: hf-token-secret
componentType: planner
replicas: 1
......@@ -63,7 +61,6 @@ spec:
# See docs/planner/sla_planner_quickstart.md for more details
name: planner-profile-data
TRTLLMDecodeWorker:
dynamoNamespace: trtllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
......@@ -104,7 +101,6 @@ spec:
- --disaggregation-mode
- decode
TRTLLMPrefillWorker:
dynamoNamespace: trtllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: trtllm-v1-disagg-router
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -18,7 +17,6 @@ spec:
- name: DYN_ROUTER_MODE
value: kv
TRTLLMPrefillWorker:
dynamoNamespace: trtllm-v1-disagg-router
envFromSecret: hf-token-secret
componentType: worker
replicas: 2
......@@ -44,7 +42,6 @@ spec:
- prefill
- --publish-events-and-metrics
TRTLLMDecodeWorker:
dynamoNamespace: trtllm-v1-disagg-router
envFromSecret: hf-token-secret
componentType: worker
replicas: 2
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-agg
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -16,7 +15,6 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg
componentType: worker
replicas: 1
resources:
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-agg-kvbm
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -16,7 +15,6 @@ spec:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-kvbm
componentType: worker
replicas: 1
resources:
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-agg-router
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -19,7 +18,6 @@ spec:
value: kv
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-router
componentType: worker
replicas: 2
resources:
......
......@@ -12,7 +12,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-agg-router-kv-approx
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -31,7 +30,6 @@ spec:
value: kv
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-router-kv-approx
componentType: worker
replicas: 2
resources:
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-disagg
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -23,7 +22,6 @@ spec:
- --http-port
- "8000"
decode:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......@@ -46,7 +44,6 @@ spec:
- --tensor-parallel-size
- "2"
prefill:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......
......@@ -8,14 +8,12 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-disagg
componentType: frontend
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
......@@ -36,7 +34,6 @@ spec:
- Qwen/Qwen3-0.6B
- --is-decode-worker
VllmPrefillWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
......
......@@ -8,14 +8,12 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-disagg-kvbm
componentType: frontend
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker:
dynamoNamespace: vllm-disagg-kvbm
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......@@ -37,7 +35,6 @@ spec:
- "32000"
- --enforce-eager
VllmPrefillWorker:
dynamoNamespace: vllm-disagg-kvbm
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......
......@@ -8,14 +8,12 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-disagg-kvbm-2p2d
componentType: frontend
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker:
dynamoNamespace: vllm-disagg-kvbm-2p2d
envFromSecret: hf-token-secret
componentType: worker
replicas: 2
......@@ -37,7 +35,6 @@ spec:
- "32000"
- --enforce-eager
VllmPrefillWorker:
dynamoNamespace: vllm-disagg-kvbm-2p2d
envFromSecret: hf-token-secret
componentType: worker
replicas: 2
......
......@@ -8,14 +8,12 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-disagg-kvbm-tp2
componentType: frontend
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
VllmDecodeWorker:
dynamoNamespace: vllm-disagg-kvbm-tp2
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......@@ -43,7 +41,6 @@ spec:
- --tensor-parallel-size
- "2"
VllmPrefillWorker:
dynamoNamespace: vllm-disagg-kvbm-tp2
envFromSecret: hf-token-secret
componentType: worker
replicas: 1
......
......@@ -8,14 +8,12 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-disagg-planner
componentType: frontend
replicas: 1
extraPodSpec:
mainContainer:
image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:my-tag
Planner:
dynamoNamespace: vllm-disagg-planner
componentType: planner
replicas: 1
extraPodSpec:
......@@ -42,7 +40,6 @@ spec:
# See docs/planner/sla_planner_quickstart.md for more details
name: planner-profile-data
VllmDecodeWorker:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: decode
......@@ -62,7 +59,6 @@ spec:
- --model
- Qwen/Qwen3-0.6B
VllmPrefillWorker:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
componentType: worker
subComponentType: prefill
......
......@@ -8,7 +8,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-v1-disagg-router
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -18,7 +17,6 @@ spec:
- name: DYN_ROUTER_MODE
value: kv
VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret
componentType: worker
replicas: 2
......@@ -38,7 +36,6 @@ spec:
- Qwen/Qwen3-0.6B
- --is-decode-worker
VllmPrefillWorker:
dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret
componentType: worker
replicas: 2
......
......@@ -7,7 +7,6 @@ metadata:
spec:
services:
Frontend:
dynamoNamespace: vllm-agg-lora
componentType: frontend
replicas: 1
extraPodSpec:
......@@ -15,7 +14,6 @@ spec:
image: nvcr.io/nvidian/dynamo-dev/biswa:7e499b5c460f1883a9945d221123e0760051210f-39500608-vllm-amd64
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-lora
componentType: worker
subComponentType: decode
replicas: 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment